FullTextStringIndex.java example

Explorer
mulgara-master
- src
  - jar
  - war
    - server-http
      - java
        HttpServer.java
        HttpServerServlet.java
- tools
  - src
    - org
      - mulgara
        tools
        Sparql.java
        Tql.java
/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is the Kowari Metadata Store.
 *
 * The Initial Developer of the Original Code is Plugged In Software Pty
 * Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
 * created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
 * Plugged In Software Pty Ltd. All Rights Reserved.
 *
 * Contributor(s): N/A.
 *
 * [NOTE: The text of this Exhibit A may differ slightly from the text
 * of the notices in the Source Code files of the Original Code. You
 * should use the text of this Exhibit A rather than the text found in the
 * Original Code Source Code for Your Modifications.]
 *
 */

package org.mulgara.resolver.lucene;

// Java 2 standard packages
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;

import static org.mulgara.resolver.lucene.LuceneResolver.LUCENE_VERSION;

/**
 * The utility class which provides an interface of adding, finding and removing
 * statements and documents for Lucene.
 *
 * @created 2002-03-15
 *
 * @author <a href="http://staff.pisoftware.com/tate">Tate Jones</a>
 * @author Andrew Newman
 *
 * @version $Revision: 1.8 $
 *
 * @modified $Date: 2005/01/05 04:58:47 $ by $Author: newmana $
 *
 * @maintenanceAuthor $Author: newmana $
 *
 * @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
 *
 * @copyright © 2001-2003 <A href="http://www.PIsoftware.com/">Plugged In
 *      Software Pty Ltd</A>
 *
 * @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
 */
public class FullTextStringIndex {
  /** Logger. This is named after the class. */
  private final static Logger logger = Logger.getLogger(FullTextStringIndex.class);

  //
  // Constants
  //

  /** The field name for the actual literal */
  public final static String ID_KEY = "id";

  /** The field name for the stemmed literal */
  public final static String LITERAL_KEY = "stemmedliteral";

  /** The field name for the actual subject */
  public final static String SUBJECT_KEY = "subject";

  /** The field name for the actual predicate */
  public final static String PREDICATE_KEY = "predicate";

  /** The field name for the reverse literal */
  public final static String REVERSE_LITERAL_KEY = "reverseliteral";
  
  //
  // Fields
  //

  /** The lucene indexer cache */
  private LuceneIndexerCache indexerCache;

  /** The index writer */
  private IndexWriter indexer;

  /** The index searcher */
  private IndexSearcher indexSearcher;

  /** Whether any modifications have been made to the index. */
  private boolean madeMods = false;

  /** Whether to close the indexers when closing this index */
  private boolean closeIndexers = false;

  /** Enable reverse literal lookup */
  private boolean enableReverseTextIndex;

  /** Analyzer used for writing and reading */
  private Analyzer analyzer = getAnalyzer();

  /**
   * Create a new FullTextStringIndex object. Uses the system property
   * "mulgara.textindex.reverse.enabled" to set the desired value for
   * enableReverseTextIndex, or will default to "false" if not set.
   *
   * @param indexerCache the indexer-cache to use to get the indexers
   * @param forWrites    whether writes will occur or not
   * @throws FullTextStringIndexException on failure to obtain an index reader or writer
   */
  public FullTextStringIndex(LuceneIndexerCache indexerCache, boolean forWrites)
      throws FullTextStringIndexException {
    this(indexerCache, forWrites, Boolean.getBoolean("mulgara.textindex.reverse.enabled"));
  }

  /**
   * Create a new FullTextStringIndex object.
   *
   * @param indexerCache the indexer-cache to use to get the indexers
   * @param forWrites whether to acquire an index writer
   * @param enableReverseTextIndex true if you can begin Lucene queries with wildcards.
   * @throws FullTextStringIndexException on failure to obtain an index reader or writer
   */
  public FullTextStringIndex(LuceneIndexerCache indexerCache, boolean forWrites,
                             boolean enableReverseTextIndex)
      throws FullTextStringIndexException {
    this.indexerCache = indexerCache;
    this.enableReverseTextIndex = enableReverseTextIndex;
    initialize(forWrites);
  }

  /**
   * Get an instance of the analyzer used on text to produce the index.
   *
   * @return The analyzer used.
   */
  public static Analyzer getAnalyzer() {
    return new StandardAnalyzer(LUCENE_VERSION);
  }

  /**
   * Determine if the literal search string contains a leading wildcard.
   *
   * @param literal PARAMETER TO DO
   * @return The LeadingWildcard value
   */
  private static boolean isLeadingWildcard(String literal) {
    return (literal.startsWith("?") || literal.startsWith("*") ||
        (literal.indexOf(" *") >= 0) || (literal.indexOf(" ?") >= 0) ||
        (literal.indexOf("-*") >= 0) || (literal.indexOf("-?") >= 0) ||
        (literal.indexOf("+*") >= 0) || (literal.indexOf("+?") >= 0));
  }

  /**
   * Create a key to uniquely identify a triple Used for performing deletions.
   * TODO : hashcode is not the most appropriate technique. In future change to
   * MD5 sum.
   *
   * @param subject PARAMETER TO DO
   * @param predicate PARAMETER TO DO
   * @param literal PARAMETER TO DO
   * @return RETURNED VALUE TO DO
   */
  private static String createKey(String subject, String predicate, String literal) {
    return String.valueOf(subject.hashCode()) + String.valueOf(predicate.hashCode()) +
           String.valueOf(literal.hashCode());
  }

  /**
   * Reverse the literal search string to ensure the + and - contraints are
   * prefixed.
   *
   * @param literal PARAMETER TO DO
   * @return RETURNED VALUE TO DO
   */
  private static String reverseLiteralSearch(String literal) {
    String newReversedString = null;

    // does it contain any +'s or -'s in the search request?
    if ((literal.indexOf("+") >= 0) || (literal.indexOf("-") >= 0)) {
      StringBuilder searchReversed = new StringBuilder();
      String[] tokens = literal.split(" ");

      for (int i = tokens.length - 1; i >= 0; i--) {
        StringBuilder reversedStringBuff = new StringBuilder(tokens[i]).reverse();

        char lastChar = reversedStringBuff.charAt(reversedStringBuff.length() - 1);

        // move the + || - to the start of the reversed string
        if ((lastChar == '+') || (lastChar == '-')) {
          reversedStringBuff.deleteCharAt(reversedStringBuff.length() - 1).insert(0, lastChar);
        }

        searchReversed.append(reversedStringBuff);

        if (i != 0) {
          searchReversed.append(" ");
        }
      }

      newReversedString = searchReversed.toString();
    } else {
      // perform a simple reverse
      newReversedString = (new StringBuilder(literal).reverse()).toString();
    }

    if (logger.isDebugEnabled()) {
      logger.debug("Reversed literal search from : " + literal + " to " + newReversedString);
    }

    return newReversedString;
  }

  /**
   * Add a subject, predicate and literal into the fulltext string pool. {@link
   * StandardAnalyzer} sets the filters used to on the literal field index.
   *
   * @param subject the subject to be added
   * @param predicate the predicate to be added
   * @param literal literal to be analyzed for fulltext searching
   * @return boolean Return true if successful
   * @throws FullTextStringIndexException Failure to add string due to an
   *      IOException
   */
  public boolean add(String subject, String predicate, String literal) throws FullTextStringIndexException {
    if (literal == null   ||
        subject == null   || subject.length() == 0 ||
        predicate == null || predicate.length() == 0) {
      throw new FullTextStringIndexException("Subject, predicate or literal has " +
                                             "not been supplied a value");
    }

    boolean added = false;

    // Warn for an empty literal
    if (literal.trim().length() == 0) {
      if (logger.isDebugEnabled()) logger.debug("Ignoring empty literal");
    } else {
      // debug logging
      if (logger.isDebugEnabled()) {
        logger.debug("Adding subject <" + subject + "> predicate <" + predicate + "> literal <'" +
                     literal + "'> to fulltext string index");
      }

      Document indexDocument = new Document();

      // Add the literal value to the predicate field and tokenize it for
      // fulltext searching
      indexDocument.add(new Field(LITERAL_KEY, literal, Field.Store.YES, Field.Index.ANALYZED));

      // Add the literal value to the predicate field and tokenize it for
      // fulltext searching in reverse order
      if (enableReverseTextIndex) {
        indexDocument.add(new Field(REVERSE_LITERAL_KEY,
            (new StringBuilder(literal).reverse()).toString(), Field.Store.YES, Field.Index.ANALYZED));
      }

      // Add the actual literal, do not tokenize it. Required for exact
      // matching. ie. removal
      indexDocument.add(new Field(ID_KEY,
          createKey(subject, predicate, literal), Field.Store.YES, Field.Index.NOT_ANALYZED));

      // Add the predicate, do not tokenize it, required for exact matching
      indexDocument.add(new Field(PREDICATE_KEY, predicate, Field.Store.YES, Field.Index.NOT_ANALYZED));

      // Add the subject, do not tokenize it, required for exact matching
      indexDocument.add(new Field(SUBJECT_KEY, subject, Field.Store.YES, Field.Index.NOT_ANALYZED));

      try {
        indexer.addDocument(indexDocument, analyzer);
        added = true;
        madeMods = true;
      } catch (IOException ex) {
        closeIndexers = true;
        logger.error("Unable to add fulltext string subject <" + subject + "> predicate <" +
                     predicate + "> literal <'" + literal + "'> to fulltext string index", ex);
        throw new FullTextStringIndexException(
            "Unable to add fulltext string subject <" + subject + "> predicate <" +
            predicate + "> literal <'" + literal + "'> to fulltext string index",
            ex);
      }

    } // Warn empty literals

    return added;
  }

  /**
   * Add a subject, predicate and literal into the fulltext string pool. {@link
   * StandardAnalyzer} sets the filters used to on the literal field index.
   *
   * @param subject the subject to be added
   * @param predicate the predicate to be added
   * @param resource resource to be analyzed for fulltext searching
   * @param reader stream containing text of the resource
   * @return <code>true</code> if successful
   * @throws FullTextStringIndexException Failure to add string due to an
   *      IOException
   */
  public boolean add(String subject, String predicate, String resource, Reader reader)
      throws FullTextStringIndexException {
    // Validate "subject" parameter
    if ((subject == null) || (subject.length() == 0)) {
      throw new FullTextStringIndexException("No \"subject\" parameter");
    }

    // Validate "predicate" parameter
    if ((predicate == null) || (predicate.length() == 0)) {
      throw new FullTextStringIndexException("No \"predicate\" parameter");
    }

    // Validate "resource" parameter
    if ((resource == null) || (resource.length() == 0)) {
      throw new FullTextStringIndexException("No \"resource\" parameter");
    }

    // Validate "reader" parameter
    if (reader == null) {
      throw new FullTextStringIndexException("Null \"reader\" parameter");
    }

    boolean added = false;

    // debug logging
    if (logger.isDebugEnabled()) {
      logger.debug("Adding subject <" + subject + "> predicate <" + predicate + "> resource <" +
                   resource + "> to fulltext string index");
    }

    Document indexDocument = new Document();

    // Add the resource content to the predicate field and tokenize it for
    // fulltext searching
    indexDocument.add(new Field(LITERAL_KEY, reader));

    // Add the resource label, do not tokenize it. Required for exact
    // matching. ie. removal
    indexDocument.add(new Field(ID_KEY,
        createKey(subject, predicate, resource), Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the predicate, do not tokenize it, required for exact matching
    indexDocument.add(new Field(PREDICATE_KEY, predicate, Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the subject, do not tokenize it, required for exact matching
    indexDocument.add(new Field(SUBJECT_KEY, subject, Field.Store.YES, Field.Index.NOT_ANALYZED));

    try {
      indexer.addDocument(indexDocument, analyzer);
      added = true;
      madeMods = true;
    } catch (IOException ex) {
      closeIndexers = true;
      logger.error("Unable to add fulltext string subject <" + subject + "> predicate <" +
                   predicate + "> resource <" + resource + "> to fulltext string index", ex);
      throw new FullTextStringIndexException(
          "Unable to add fulltext string subject <" + subject + "> predicate <" +
          predicate + "> resource <" + resource + "> to fulltext string index",
          ex);
    }

    return added;
  }

  /**
   * Add a document into the fulltext string pool. The constants {@link
   * #SUBJECT_KEY}, {@link #PREDICATE_KEY}, {@link #LITERAL_KEY} should be used
   * in the query to reference the relevant index fields if the index is to read
   * by queries.
   *
   * @param indexDocument The document to be indexed.
   * @return true if successful
   * @throws FullTextStringIndexException Failure to add string due to an
   *      IOException
   */
  public boolean add(Document indexDocument) throws FullTextStringIndexException {
    if (indexDocument == null) {
      throw new FullTextStringIndexException("The document to be indexed was null.");
    }

    boolean added = false;

    // debug logging
    if (logger.isDebugEnabled()) {
      logger.debug("Adding document " + indexDocument + " to fulltext string index");
    }

    try {
      indexer.addDocument(indexDocument, analyzer);
      added = true;
      madeMods = true;
    } catch (IOException ex) {
      closeIndexers = true;
      logger.error("Unable to add " + indexDocument + " to fulltext string index", ex);
      throw new FullTextStringIndexException("Unable to add " + indexDocument + " to fulltext string index", ex);
    }

    return added;
  }

  /**
   * Remove the extact string from the fulltext string pool
   *
   * @param subject subject must be supplied
   * @param predicate predicate must be supplied
   * @param literal literal must be supplied
   * @return True is the string was successfully removed
   * @throws FullTextStringIndexException An IOException occurs on index
   *      modification
   */
  public boolean remove(String subject, String predicate, String literal)
      throws FullTextStringIndexException {
    boolean removed = false;
    if ((literal == null) ||
        (subject == null) ||
        (subject.length() == 0) ||
        (predicate == null) ||
        (predicate.length() == 0)) {
      throw new FullTextStringIndexException("Subject, predicate or literal has not been supplied a value");
    }

    //Create the composite key for searching
    String key = createKey(subject, predicate, literal);

    try {
      Term term = new Term(ID_KEY, key);
      indexer.deleteDocuments(term);
      removed = true; // TODO: could use docCount(), but that seems overly expensive
      madeMods = true;

      if (logger.isDebugEnabled()) {
        if (removed) {
          logger.debug("Removed key '" + key + "' from fulltext string pool");
        } else {
          logger.debug("Unable to removed string '" + key + "' from fulltext string pool");
        }
      }
    } catch (IOException ex) {
      closeIndexers = true;
      logger.error("Unable to delete the string '" + key + "'", ex);
      throw new FullTextStringIndexException("Unable to delete the string '" + key + "'", ex);
    }

    return removed;
  }

  /**
   * Remove all entries in the string pool. Unlike {@link LuceneIndexerCache#removeAllIndexes},
   * this may be * called while readers are active. However, this method may be very slow. Also
   * note that this will <strong>not</strong> remove entries that have been added as part of the
   * current transaction!
   *
   * @throws FullTextStringIndexException Exception occurs when attempting to remove the documents
   */
  public void removeAll() throws FullTextStringIndexException {
    // debug logging
    if (logger.isDebugEnabled()) {
      logger.debug("Removing all documents from " + indexerCache.getDirectory());
    }

    try {
      indexer.deleteDocuments(new MatchAllDocsQuery());
      madeMods = true;
    } catch (IOException ex) {
      closeIndexers = true;
      logger.error("Unable to delete all documents", ex);
      throw new FullTextStringIndexException("Unable to delete all documents", ex);
    }
  }

  /**
   * Close this index and return the indexers to the cache.
   */
  public void close() {
    if (logger.isDebugEnabled()) {
      logger.debug("Closing fulltext indexes");
    }

    if (indexer != null) {
      indexerCache.returnWriter(indexer, closeIndexers);
      indexer = null;
    }

    if (indexSearcher != null) {
      indexerCache.returnReader(indexSearcher.getIndexReader(), closeIndexers);
      indexSearcher = null;
    }
  }

  /**
   * Optimize the index and then flush it to disk.
   *
   * @throws FullTextStringIndexException If there was a problem reading from or writing to the disk.
   */
  public void optimize() throws FullTextStringIndexException {
    if (indexer == null) return;

    if (logger.isInfoEnabled()) {
      logger.info("Optimizing fulltext index at " + indexerCache.getDirectory() + " please wait...");
    }

    try {
      indexer.optimize();
    } catch (IOException ex) {
      closeIndexers = true;
      logger.error("Unable to optimize existing fulltext string pool index", ex);
      throw new FullTextStringIndexException("Unable to optimize existing fulltext string pool index", ex);
    }
  }

  /**
   * Find a string within the fulltext string pool. The search is based on the
   * {@link StandardAnalyzer} used to add the string.
   *
   * @param subject subject; may be null
   * @param predicate predicate; may be null
   * @param literal literal to be searched via the analyzer; may be null
   * @return Object containing the hits
   * @throws FullTextStringIndexException IOException occurs on reading index
   */
  public Hits find(String subject, String predicate, String literal) throws FullTextStringIndexException {
    Query query;

    if (subject == null && predicate == null && literal == null) {
      query = new MatchAllDocsQuery();
    } else {
      BooleanQuery bQuery = new BooleanQuery();
      query = bQuery;

      // debug logging
      if (logger.isDebugEnabled()) {
        logger.debug("Searching the fulltext string index pool with  subject :" + subject +
                     " predicate :" + predicate + " literal :" + literal);
      }

      if (subject != null) {
        TermQuery tSubject = new TermQuery(new Term(SUBJECT_KEY, subject));
        if (literal != null) tSubject.setBoost(0);      // if scoring, don't affect the score
        bQuery.add(tSubject, BooleanClause.Occur.MUST);
      }

      if (predicate != null) {
        TermQuery tPredicate = new TermQuery(new Term(PREDICATE_KEY, predicate));
        if (literal != null) tPredicate.setBoost(0);    // if scoring, don't affect the score
        bQuery.add(tPredicate, BooleanClause.Occur.MUST);
      }

      if (literal != null) {
        Query qliteral = null;

        try {
          // Are we performing a reverse string lookup?
          if (enableReverseTextIndex && isLeadingWildcard(literal)) {
            literal = reverseLiteralSearch(literal);
            QueryParser parser = new QueryParser(LUCENE_VERSION, REVERSE_LITERAL_KEY, analyzer);
            qliteral = parser.parse(literal);
          } else {
            QueryParser parser = new QueryParser(LUCENE_VERSION, LITERAL_KEY, analyzer);
            qliteral = parser.parse(literal);
          }
        } catch (ParseException ex) {
          logger.error("Unable to parse query '" + literal + "'", ex);
          throw new FullTextStringIndexException("Unable to parse query '" + literal + "'", ex);
        }

        bQuery.add(qliteral, BooleanClause.Occur.MUST);
      }

      // debug logging
      if (literal != null && logger.isDebugEnabled()) {
        if ((literal.startsWith("*") || literal.startsWith("?")) && enableReverseTextIndex) {
          logger.debug("Searching the fulltext string index pool with parsed query as " +
                       bQuery.toString(REVERSE_LITERAL_KEY));
        } else {
          logger.debug("Searching the fulltext string index pool with parsed query as " +
                       bQuery.toString(LITERAL_KEY));
        }
      }
    }

    //Perform query
    return find(query);
  }

  /**
   * Execute a query against the string pool. The constants {@link
   * #SUBJECT_KEY}, {@link #PREDICATE_KEY}, {@link #LITERAL_KEY} should be used
   * in the query to reference the relevant index filds if the index was created
   * by queries. Use the method {@link #getAnalyzer()} to get the analyzer
   * used by this class.
   *
   * @param query The query to execute.
   * @return RETURNED VALUE TO DO
   * @throws IOException occurs on reading index.
   * @throws FullTextStringIndexException EXCEPTION TO DO
   */
  public Hits find(Query query) throws FullTextStringIndexException {
    if (query == null) {
      throw new FullTextStringIndexException("The query may not be null.");
    }

    Hits hits = null;

    try {
      // debug logging
      if (logger.isDebugEnabled()) {
        logger.debug("Searching the fulltext string index pool with query " + query.toString(LITERAL_KEY));
      }

      //Perform query
      indexSearcher.search(query, hits = new Hits(indexSearcher.getIndexReader()));

      if (logger.isDebugEnabled()) {
        logger.debug("Got hits: " + hits.length());
      }
    } catch (IOException ex) {
      closeIndexers = true;
      logger.error("Unable to read results for query '" + query.toString(LITERAL_KEY) + "'", ex);
      throw new FullTextStringIndexException("Unable to read results for query '" + query.toString(LITERAL_KEY) + "'", ex);
    }

    return hits;
  }

  /**
   * The maximum number of documents the given query could return.
   *
   * @param subject   the subject; may be null
   * @param predicate the predicate; may be null
   * @param object   literal to be searched via the analyzer; may be null
   * @return the maximum number of documents
   * @throws FullTextStringIndexException if an error occurred
   */
  public long getMaxDocs(String subject, String predicate, String object)
      throws FullTextStringIndexException {
    long total = -1;

    try {
      if (subject != null) {
        total = indexSearcher.docFreq(new Term(SUBJECT_KEY, subject));
        if (total == 0) return 0;
      }

      if (predicate != null) {
        if (total > 0)
          total = Math.min(indexSearcher.docFreq(new Term(PREDICATE_KEY, predicate)), total);
        if (total == 0) return 0;
      }

      if (object != null) {
        QueryParser parser = new QueryParser(LUCENE_VERSION, LITERAL_KEY, analyzer);
        total = findMinDocCount(parser.parse(object), total);
      }

      return (total >= 0) ? total : indexSearcher.maxDoc();
    } catch (IOException ioe) {
      closeIndexers = true;
      throw new FullTextStringIndexException("Unable to count results for query '" + object + "'", ioe);
    } catch (ParseException pe) {
      throw new FullTextStringIndexException("Unable to parse query '" + object + "'", pe);
    }
  }

  private long findMinDocCount(Query q, long max) throws IOException {
    long count = max;

    if (q instanceof TermQuery) {
      Term term = ((TermQuery)q).getTerm();
      count = Math.min(indexSearcher.docFreq(term), count);
    } else if (q instanceof BooleanQuery) {
      for (BooleanClause clause : ((BooleanQuery)q).getClauses()) {
        if (clause.isRequired()) count = findMinDocCount(clause.getQuery(), count);
      }
    } else if (q instanceof PhraseQuery) {
      for (Term term : ((PhraseQuery)q).getTerms()) {
        count = Math.min(indexSearcher.docFreq(term), count);
      }
    }

    return count;
  }

  /**
   * Acquire the indexers.
   *
   * @param forWrites whether to acquire an index writer
   * @throws FullTextStringIndexException if an exception occurs while trying to
   *      locate or create the indexes
   */
  private void initialize(boolean forWrites) throws FullTextStringIndexException {
    if (forWrites) {
      openWriteIndex();
    }

    try {
      openReadIndex();
    } catch (FullTextStringIndexException e) {
      if (indexer != null) {
        logger.debug("Failed to get reader, closing writer");
        try {
          indexerCache.returnWriter(indexer, true);
        } finally {
          indexer = null;
        }
      }
      throw e;
    }

    if (logger.isDebugEnabled()) {
      logger.debug("Fulltext string index initialized");
    }
  }

  /**
   * Open the index on disk for writing.
   *
   * @throws FullTextStringIndexException if there is an error whilst opening the index.
   */
  private void openWriteIndex() throws FullTextStringIndexException {
    try {
      indexer = indexerCache.getWriter();
    } catch (IOException ioe) {
      closeIndexers = true;
      throw new FullTextStringIndexException("Unable to open fulltext string pool index", ioe);
    }
  }

  /**
   * Open the index on disk for reading.
   *
   * @throws FullTextStringIndexException if there is an error whilst opening the index.
   */
  private void openReadIndex() throws FullTextStringIndexException {
    try {
      indexSearcher = new IndexSearcher(indexerCache.getReader());
    } catch (IOException ioe) {
      closeIndexers = true;
      throw new FullTextStringIndexException("Unable to open fulltext index for reading", ioe);
    }
  }

  public void prepare() throws IOException {
    if (logger.isDebugEnabled()) {
      logger.debug("Preparing fulltext indexes");
    }

    if (indexer != null) indexer.prepareCommit();
  }

  public void rollback() throws IOException {
    if (logger.isDebugEnabled()) {
      logger.debug("Rolling back fulltext indexes");
    }

    closeIndexers = true;
    if (indexer != null) indexer.rollback();
  }

  public void commit() throws IOException {
    if (logger.isDebugEnabled()) {
      logger.debug("Comitting fulltext indexes");
    }

    if (indexer != null) {
      indexer.commit();
      if (madeMods) indexerCache.indexModified(indexer);
    }
  }

  /**
   * Lucene Hits has been deprecated, so this is our simple version thereof. Since we always
   * read all results, this is more efficient too.
   *
   * <p>TODO: since we collect all hits in memory (just id and score, not actual Document's),
   * we could have problems with very large results. However, jdk 1.5 memory usage seems to be
   * around 20B/entry on 32-bit and 33B/entry on 64-bit systems for the hits array, allowing us
   * to handle 1M hits without too much trouble.
   */
  public static class Hits extends Collector implements Cloneable {
    private final IndexReader reader;
    private final List<ScoreDoc> hits = new ArrayList<ScoreDoc>();
    private boolean closed = false;
    private Scorer scorer = null;
    private int docBase = 0;

    public Hits(IndexReader reader) {
      this.reader = reader;
      reader.incRef();
    }

    public final int length() {
      return hits.size();
    }

    @Override
    public boolean acceptsDocsOutOfOrder() {
      return true;
    }

    @Override
    public void collect(int doc) throws IOException {
      float score = scorer.score();
      if (score > 0.0f) hits.add(new ScoreDoc(docBase + doc, score));
    }

    @Override
    public void setNextReader(IndexReader reader, int docBase) throws IOException {
      this.docBase = docBase;
    }

    @Override
    public void setScorer(Scorer scorer) throws IOException {
      this.scorer = scorer;
    }

    public final Document doc(int n, FieldSelector fs) throws IOException {
      return reader.document(hits.get(n).doc, fs);
    }

    public final float score(int n) throws IOException {
      return hits.get(n).score;
    }

    public void sort() {
      Collections.sort(hits, new Comparator<ScoreDoc>() {
        public int compare(ScoreDoc sd1, ScoreDoc sd2) {
          return (sd1.doc < sd2.doc) ? -1 : (sd1.doc == sd2.doc) ? 0 : +1;
        }
      });
    }

    public void close() throws IOException {
      if (closed) return;

      closed = true;
      reader.decRef();
    }

    public Hits clone() {
      reader.incRef();
      try {
        return (Hits) super.clone();
      } catch (CloneNotSupportedException e) {
        throw new Error(getClass() + " doesn't support clone, which it must", e);
      }
    }
  }
}