DefaultQueryProcessor.java example

Explorer

xtf-dsc-master
- WEB-INF
  - contrib
    - xtf-lucene
      - src
        java
        org
        apache
        lucene
        bigram
        BigramQueryRewriter.java
        BigramSpanRangeQuery.java
        BigramSpanWildcardQuery.java
        BigramStopFilter.java
        chunk
        Chunk.java
        ChunkMarkPos.java
        ChunkSource.java
        ChunkedWordIter.java
        DocNumMap.java
        SpanChunkedNotQuery.java
        SpanDechunkingQuery.java
        SparseStringComparator.java
        limit
        ExcessiveWorkException.java
        LimIndexReader.java
        LimTermDocs.java
        LimTermPositions.java
        TermLimitException.java
        mark
        BasicMarkPos.java
        BasicWordIter.java
        ContextMarker.java
        MarkCollector.java
        MarkPos.java
        WordIter.java
        search
        FieldSpanSource.java
        FlippableStringComparator.java
        QueryRewriter.java
        QueryTraverser.java
        RecordingSearcher.java
        SpanHitCollector.java
        spans
        EmptySpans.java
        FieldSpans.java
        NearSpans.java
        OrNearSpans.java
        Span.java
        SpanFirstQuery.java
        SpanNearQuery.java
        SpanNotNearQuery.java
        SpanNotQuery.java
        SpanOrNearQuery.java
        SpanOrQuery.java
        SpanPosComparator.java
        SpanQuery.java
        SpanRangeQuery.java
        SpanRecordingScorer.java
        SpanScorer.java
        SpanTermQuery.java
        SpanWeight.java
        SpanWildcardQuery.java
        Spans.java
        spell
        SpellKeywordTest.java
        spelt
        DoubleMetaphone.java
        FreqData.java
        LuceneIndexToDict.java
        MinimalAnalyzer.java
        QuerySpeller.java
        SimpleQueryRewriter.java
        SpellReader.java
        SpellTestCmdLine.java
        SpellWriter.java
        SpellWritingAnalyzer.java
        SpellWritingFilter.java
        TRStringDistance2.java
        WordEquiv.java
        util
        CountedInputStream.java
        CountedOutputStream.java
        FileSorter.java
        Hash64.java
        IntList.java
        LongList.java
        LongSet.java
        Prime.java
        PriorityQueue.java
        ProgressTracker.java
        RandomAccessInputStream.java
        StringUtil.java
      - test
        java
        org
        apache
        lucene
        spelt
        DoubleMetaphoneTest.java
        FreqDataTest.java
        LuceneIndexToDictTest.java
        QuerySpellerTest.java
        SimpleQueryRewriterTest.java
        SpellReadWriteTest.java
        SpellWritingAnalyzerTest.java
        TRStringDistance2Test.java
        util
        CountedInputStreamTest.java
        CountedOutputStreamTest.java
        FileSorterTest.java
        Hash64Test.java
        IntListTest.java
        LongListTest.java
        LongSetTest.java
        PrimeTest.java
        ProgressTrackerTest.java
        RandomAccessInputStreamTest.java
        StringUtilTest.java
  - src
    - net
      - sf
        saxon
        tinytree
        HackedTinyBuilder.java
        trans
        KeyManager.java
    - org
      - cdlib
        xtf
        cache
        Cache.java
        CacheDependency.java
        Dependency.java
        FileDependency.java
        GeneratingCache.java
        SimpleCache.java
        StringCache.java
        crossQuery
        CrossQuery.java
        CrossQueryConfig.java
        QueryRoute.java
        QueryRouteException.java
        TimeProfilingListener.java
        raw
        RawQuery.java
        test
        TestableCrossQuery.java
        dynaXML
        Authenticator.java
        DefaultDocLocator.java
        DocLocator.java
        DocRequest.java
        DynaXML.java
        DynaXMLConfig.java
        DynaXMLException.java
        InvalidDocumentException.java
        IpList.java
        NoPermissionException.java
        UnsupportedQueryException.java
        test
        TestableDynaXML.java
        lazyTree
        AncestorEnumeration.java
        AttributeEnumeration.java
        AttributeImpl.java
        ChildEnumeration.java
        DescendantEnumeration.java
        ElementImpl.java
        FastNodeTestPattern.java
        Flag.java
        FollowingEnumeration.java
        FollowingSiblingEnumeration.java
        LazyDocument.java
        LazyHashMap.java
        LazyKeyManager.java
        LazyProfilingListener.java
        LazyTreeBuilder.java
        NodeImpl.java
        ParentNodeImpl.java
        PersistentTree.java
        PrecedingEnumeration.java
        PrecedingOrAncestorEnumeration.java
        PrecedingSiblingEnumeration.java
        ProxyAttributeEnumeration.java
        ProxyAttributeImpl.java
        ProxyElement.java
        SearchElement.java
        SearchElementImpl.java
        SearchNode.java
        SearchTextImpl.java
        SearchTree.java
        TextImpl.java
        TreeEnumeration.java
        saxonExt
        ElementWithContent.java
        Exec.java
        Image.java
        InstructionWithContent.java
        Mail.java
        Pipe.java
        Redirect.java
        SQL.java
        exec
        ArgElement.java
        InputElement.java
        PipeImageElement.java
        PipeImageInstruction.java
        RunElement.java
        RunInstruction.java
        image
        ImageCache.java
        OutputElement.java
        mail
        SendElement.java
        pipe
        PipeBufferPool.java
        PipeFileElement.java
        PipeFopElement.java
        PipeRequestElement.java
        redirect
        HttpErrorElement.java
        RedirectElement.java
        sql
        SQLClose.java
        SQLColumn.java
        SQLConnect.java
        SQLDelete.java
        SQLInsert.java
        SQLProperty.java
        SQLQuery.java
        SQLUpdate.java
        servletBase
        CQLParseException.java
        DTDSuppressingXMLReader.java
        LatencyCutoffStream.java
        RedirectException.java
        SessionURLRewriter.java
        StylesheetCache.java
        TextConfig.java
        TextServlet.java
        test
        FakeOutputStream.java
        FakeServletConfig.java
        FakeServletContext.java
        FakeServletRequest.java
        FakeServletResponse.java
        NullOutputStream.java
        RegressTest.java
        textEngine
        AccentFoldingRewriter.java
        BoostSet.java
        BoostSetParams.java
        BoundedMarkPos.java
        BoundedWordIter.java
        ConfigCache.java
        Constants.java
        DefaultQueryProcessor.java
        DocHit.java
        DocHitImpl.java
        FlippingDirectory.java
        HitLoadException.java
        HitQueue.java
        IndexUtil.java
        IndexValidator.java
        IndexWarmer.java
        MoreLikeThisQuery.java
        NativeFSDirectory.java
        NumericFieldData.java
        NumericRangeQuery.java
        PluralFoldingRewriter.java
        QueryContext.java
        QueryGenException.java
        QueryProcessor.java
        QueryRequest.java
        QueryRequestParser.java
        QueryResult.java
        RefieldingQueryRewriter.java
        SlopFixupRewriter.java
        Snippet.java
        SnippetMaker.java
        SpanExactQuery.java
        SpanSectionTypeQuery.java
        SpellSuggRewriter.java
        SpellcheckParams.java
        SpellingSuggestion.java
        StdTermFilter.java
        StdTermRewriter.java
        TotalHitsComparator.java
        UnspanningQueryRewriter.java
        XtfBigramQueryRewriter.java
        XtfChunk.java
        XtfChunkMarkPos.java
        XtfChunkSource.java
        XtfChunkedWordIter.java
        XtfDocNumMap.java
        XtfLimIndexReader.java
        XtfQueryRewriter.java
        XtfQueryTraverser.java
        XtfSearcher.java
        XtfSpanRangeQuery.java
        XtfSpanWildcardQuery.java
        XtfWordEquiv.java
        facet
        ChildSelector.java
        DescendantSelector.java
        DocsSelector.java
        DynamicGroupData.java
        EmptySelector.java
        FRBRData.java
        FRBRGroupData.java
        FacetSpec.java
        GroupCounts.java
        GroupData.java
        GroupSelector.java
        MarkSelector.java
        NameSelector.java
        PageSelector.java
        ParseException.java
        RangeSelector.java
        ResultFacet.java
        ResultGroup.java
        RootSelector.java
        SelectedSelector.java
        SelectorParser.java
        SelectorParserConstants.java
        SelectorParserTokenManager.java
        SiblingSelector.java
        SimpleCharStream.java
        SingletonSelector.java
        StaticGroupData.java
        Token.java
        TokenMgrError.java
        TopChoiceSelector.java
        UnionSelector.java
        freeform
        CharStream.java
        FreeformQueryParser.java
        FreeformQueryParserConstants.java
        FreeformQueryParserTokenManager.java
        ParseException.java
        SimpleCharStream.java
        Token.java
        TokenMgrError.java
        textIndexer
        AccentFoldingFilter.java
        CrimsonBugWorkaround.java
        DocSelCache.java
        FacetTokenizer.java
        HTMLIndexSource.java
        HTMLToString.java
        IdxTreeCleaner.java
        IdxTreeCuller.java
        IdxTreeDictMaker.java
        IdxTreeOptimizer.java
        IndexDump.java
        IndexInfo.java
        IndexMerge.java
        IndexRecord.java
        IndexSource.java
        IndexStats.java
        IndexSync.java
        IndexerConfig.java
        MARCIndexSource.java
        MSWordIndexSource.java
        PDFIndexSource.java
        PDFToString.java
        PluralFoldingFilter.java
        SectionInfo.java
        SectionInfoStack.java
        SpellWritingFilter.java
        SrcTreeProcessor.java
        StartEndFilter.java
        StructuredFileProxy.java
        TagFilter.java
        TextIndexSource.java
        TextIndexer.java
        TextIndexerException.java
        XMLConfigParser.java
        XMLIndexSource.java
        XMLTextProcessor.java
        XTFTextAnalyzer.java
        XtfSpecialTokensFilter.java
        tokenizer
        CharStream.java
        FastCharStream.java
        ParseException.java
        Token.java
        TokenMgrError.java
        Tokenizer.java
        XTFTokenizer.java
        XTFTokenizerConstants.java
        XTFTokenizerTokenManager.java
        util
        ArrayUtil.java
        Attrib.java
        AttribList.java
        Base64.java
        CharMap.java
        CheckingTokenStream.java
        CircularQueue.java
        ConsecutiveMap.java
        DirSync.java
        DiskHashReader.java
        DiskHashWriter.java
        DocTypeDeclRemover.java
        EasyNode.java
        EmbeddedList.java
        FastIntCache.java
        FastStringCache.java
        FastStringReader.java
        FastTokenizer.java
        FileWalker.java
        FloatList.java
        GeneralException.java
        IntHash.java
        IntMultiMap.java
        LimitedOutputStream.java
        LineReader.java
        Linkable.java
        LinkableImpl.java
        Normalizer.java
        PackedByteBuf.java
        Path.java
        ProcessRunner.java
        StringHash.java
        StructuredFile.java
        StructuredStore.java
        SubDirFilter.java
        SubFileReader.java
        SubFileWriter.java
        SubStoreReader.java
        SubStoreWriter.java
        TagArray.java
        TagChars.java
        Tester.java
        ThreadWatcher.java
        Trace.java
        TraceWriter.java
        WordMap.java
        XMLFormatter.java
        XMLWriter.java
        XTFSaxonErrorListener.java
        xslt
        CharUtils.java
        FileUtils.java
        FreeformQuery.java
        Session.java
        XMLStubReader.java
        zing
        SRU.java
        SRUConfig.java

package org.cdlib.xtf.textEngine;


/**
 * Copyright (c) 2004, Regents of the University of California
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * - Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 * - Neither the name of the University of California nor the names of its
 *   contributors may be used to endorse or promote products derived from this
 *   software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * Acknowledgements:
 *
 * A significant amount of new and/or modified code in this module
 * was made possible by a grant from the Andrew W. Mellon Foundation,
 * as part of the Melvyl Recommender Project.
 */
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.chunk.DocNumMap;
import org.apache.lucene.chunk.SpanChunkedNotQuery;
import org.apache.lucene.chunk.SparseStringComparator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldSortedHitQueue;
import org.apache.lucene.search.FlippableStringComparator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RecordingSearcher;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SpanHitCollector;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.FieldSpanSource;
import org.apache.lucene.search.spans.SpanNotNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.spelt.SpellReader;
import org.apache.lucene.util.PriorityQueue;
import org.cdlib.xtf.textEngine.facet.DynamicGroupData;
import org.cdlib.xtf.textEngine.facet.FacetSpec;
import org.cdlib.xtf.textEngine.facet.GroupCounts;
import org.cdlib.xtf.textEngine.facet.GroupData;
import org.cdlib.xtf.textEngine.facet.ResultFacet;
import org.cdlib.xtf.textEngine.facet.ResultGroup;
import org.cdlib.xtf.textEngine.facet.StaticGroupData;
import org.cdlib.xtf.util.CharMap;
import org.cdlib.xtf.util.Trace;
import org.cdlib.xtf.util.WordMap;

/**
 * Takes a QueryRequest, rewrites the queries if necessary to remove stop-
 * words and form bi-grams, then consults the index(es), and produces a
 * QueryResult.
 *
 * @author Martin Haye
 */
public class DefaultQueryProcessor extends QueryProcessor 
{
  /** Map of all XtfSearchers, so we can re-use them */
  private static HashMap searchers = new HashMap();

  /** Lucene reader from which to read index data */
  private IndexReader indexReader;

  /** Fetches spelling suggestions */
  private SpellReader spellReader;

  /** Keeps track of which chunks belong to which documents */
  private DocNumMap docNumMap;

  /** Max size of a chunk (in words) */
  @SuppressWarnings("unused")
  private int chunkSize;

  /** Number of words a chunk shares with its successor */
  private int chunkOverlap;

  /** Stop-words to remove (e.g. "the", "a", "and", etc.) */
  private Set stopSet;

  /** Mapping of plural words to singular words */
  private WordMap pluralMap;

  /** Mapping of accented chars to chars without diacritics */
  private CharMap accentMap;

  /** Whether the index is "sparse" (i.e. more than 5 chunks per doc) */
  private boolean isSparse;

  /** Names of fields that are tokenized in this index */
  private Set tokFields;

  /** Total number of documents hit (not just those that scored high) */
  private int nDocsHit;

  /** Maximum document score (used to normalize scores) */
  private float maxDocScore;

  /** Document normalization factor (calculated from {@link #maxDocScore}) */
  private float docScoreNorm;

  /** Used to warm up indexes prior to use */
  private IndexWarmer indexWarmer;

  /** Comparator used for sorting strings in "sparse" indexes */
  private static final SparseStringComparator sparseStringComparator = new SparseStringComparator();

  /** Comparator used for sorting strings in "compact" indexes */
  private static final FlippableStringComparator compactStringComparator = new FlippableStringComparator();
  
  /** Comparator used to sort by total number of hits */
  private static final TotalHitsComparator totalHitsComparator = new TotalHitsComparator();
  
  /** Record an index warmer to use for background warming. */
  public void setIndexWarmer(IndexWarmer warmer) {
    indexWarmer = warmer;
  }

  /**
   * This is main entry point. Takes a pre-parsed query request and handles
   * searching the index and forming the results.<br>
   *
   * This method is synchronized because it uses two instance variables,
   * so access by multiple threads would result in incorrect counting. For
   * maximum efficiency, each thread should really use its own instance.
   *
   * @param req      The pre-parsed request to process
   * @return         Zero or more document hits
   */
  public synchronized QueryResult processRequest(final QueryRequest req)
    throws IOException 
  {
    // Clear out our counters.
    nDocsHit = 0;
    maxDocScore = 0;

    // Make an vector to store the hits (we'll make it into an array
    // later, when we know how many we have.)
    //
    Vector hitVec = new Vector(10);

    if (indexWarmer == null)
      throw new IOException("Fatal: must call setIndexWarmer() before DefaultQueryProcessor.processRequest()");

    // Get a reader, searcher, and document number map that will all be
    // consistent with each other and up-to-date.
    //
    XtfSearcher xtfSearcher = indexWarmer.getSearcher(req.indexPath);
    synchronized (xtfSearcher) {
      xtfSearcher.update();
      indexReader = xtfSearcher.indexReader();
      docNumMap = xtfSearcher.docNumMap();
      chunkSize = xtfSearcher.chunkSize();
      chunkOverlap = xtfSearcher.chunkOverlap();
      stopSet = xtfSearcher.stopSet();
      pluralMap = xtfSearcher.pluralMap();
      accentMap = xtfSearcher.accentMap();
      spellReader = xtfSearcher.spellReader();
      isSparse = xtfSearcher.isSparse();
      tokFields = xtfSearcher.tokenizedFields();
    }

    // Apply a work limit to the query if we were requested to. If no
    // specific limit was set, use a limiter with an infinite limit 
    // (because we still need it to check periodically if the thread 
    // should kill itself.)
    //
    IndexReader limReader = new XtfLimIndexReader(indexReader,
                                                  (req.workLimit > 0)
                                                  ? req.workLimit
                                                  : Integer.MAX_VALUE);

    // Translate -1 maxDocs to "essentially all"
    int maxDocs = req.maxDocs;
    if (maxDocs < 0)
      maxDocs = docNumMap.getDocCount();

    // Make a queue that will accumulate the hits and pick the first
    // load of them for us. If there is a sort field specification,
    // do it in field-sorted order; otherwise, sort by score.
    //
    final PriorityQueue docHitQueue = createHitQueue(indexReader,
                                                     req.startDoc +
                                                       req.maxDocs,
                                                     req.sortMetaFields,
                                                     isSparse);

    // Start making the result by filling in its context.
    QueryResult result = new QueryResult();
    result.context = new QueryContext();
    result.context.accentMap = accentMap;
    result.context.pluralMap = pluralMap;
    result.context.stopSet = stopSet;
    result.scoresNormalized = req.normalizeScores;

    // If no query was specified, then there will be no results.
    Query query = req.query;
    if (query == null) {
      result.docHits = new DocHit[0];
      return result;
    }

    // Perform standard tokenization tasks: change words to lowercase,
    // remove apostrophes, etc.
    //
    query = new StdTermRewriter(tokFields).rewriteQuery(query);

    // If an accent map is present, remove diacritics.
    if (accentMap != null)
      query = new AccentFoldingRewriter(accentMap, tokFields).rewriteQuery(query);

    // If a plural map is present, change plural words to non-plural.
    if (pluralMap != null)
      query = new PluralFoldingRewriter(pluralMap, tokFields).rewriteQuery(query);

    // Rewrite the query for bigrams (if we have stop-words to deal with.)
    if (stopSet != null)
      query = new XtfBigramQueryRewriter(stopSet, chunkOverlap, tokFields).rewriteQuery(
        query);

    // If there's nothing left (for instance if the query was all stop-words)
    // then there will be no results.
    //
    if (query == null) {
      result.docHits = new DocHit[0];
      return result;
    }

    // Fix up all the "infinite" slop entries to be actually limited to
    // the chunk overlap size. That way, we'll get consistent results and
    // the user won't be able to tell where the chunk boundaries are. 
    // Also, attach the docNumMap to every SpanDechunkingQuery.
    //
    final Query finalQuery = new SlopFixupRewriter(docNumMap,
                                                   stopSet,
                                                   pluralMap,
                                                   accentMap).rewriteQuery(query);

    // If debugging is enabled, print out the final rewritten and fixed
    // up query.
    //
    if (finalQuery != req.query)
      Trace.debug("Rewritten query: " + finalQuery.toString());

    // While processing the query, we want to lazily generate DocHits,
    // and only generate a DocHit once even if it's added to multiple
    // groups.
    //
    final DocHitMakerImpl docHitMaker = new DocHitMakerImpl();

    // If we're to apply a set of additional boost sets to the documents,
    // get the set now.
    //
    final BoostSet boostSet = (req.boostSetParams == null) ? null
      : BoostSet.getCachedSet(indexReader,
          new File(
            req.boostSetParams.path),
          req.boostSetParams.field);

    // Make a Lucene searcher that will access the index according to
    // our query.
    //
    RecordingSearcher searcher = new RecordingSearcher(limReader);

    // If grouping was specified by the query, read in all the group data.
    // Note that the GroupData class holds its own cache so we don't have
    // to read data for a given field more than once.
    //
    final GroupCounts[] groupCounts = (req.facetSpecs == null) ? null
                                      : prepGroups(req,
                                                   boostSet,
                                                   searcher,
                                                   finalQuery);

    // Now for the big show... go get the hits!
    searcher.search(finalQuery, null,
      new SpanHitCollector() 
      {
        public void collect(int doc, float score, FieldSpanSource spanSource) 
        {
          // Apply a boost (if there's a boost set)
          score = applyBoost(doc, score, boostSet, req);

          // Ignore deleted entries, and entries boosted down to zero.
          if (score <= 0.0f)
            return;

          // Bump the count of documents hit, and update the max score.
          nDocsHit++;
          if (score > maxDocScore)
            maxDocScore = score;

          // Record the hit.
          docHitMaker.reset(doc, score, spanSource);
          if (req.maxDocs > 0)
            docHitMaker.insertInto(docHitQueue);

          // If grouping is enabled, add this document to the counts.
          if (groupCounts != null) {
            for (int i = 0; i < groupCounts.length; i++)
              groupCounts[i].addDoc(docHitMaker);
          }
        } // collect()
      });

    // Take the high-ranking hits and add them to the hit vector.
    // Note that they come out of the hit queue in backwards order.
    //
    int nFound = docHitQueue.size();
    DocHitImpl[] hitArray = new DocHitImpl[nFound];
    for (int i = 0; i < nFound; i++) {
      int index = nFound - i - 1;
      hitArray[index] = (DocHitImpl)docHitQueue.pop();
    }

    // Calculate the document score normalization factor.
    docScoreNorm = 1.0f;
    if (req.normalizeScores && maxDocScore > 0.0f)
      docScoreNorm = 1.0f / maxDocScore;

    // We'll need a query weight if we're being asked to explain the
    // scores.
    //
    Weight weight = null;
    if (req.explainScores)
      weight = finalQuery.weight(searcher);

    // Finish off the hits (read in the fields, normalize, make snippets).
    SnippetMaker snippetMaker = new SnippetMaker(limReader,
                                                 docNumMap,
                                                 stopSet,
                                                 pluralMap,
                                                 accentMap,
                                                 tokFields,
                                                 req.maxContext,
                                                 req.termMode,
                                                 req.returnMetaFields);
    for (int i = req.startDoc; i < nFound; i++) 
    {
      if (req.explainScores) {
        hitArray[i].finishWithExplain(snippetMaker,
                                      docScoreNorm,
                                      weight,
                                      boostSet,
                                      req.boostSetParams);
      }
      else
        hitArray[i].finish(snippetMaker, docScoreNorm);
      if (result.textTerms == null)
        result.textTerms = hitArray[i].textTerms();
      hitVec.add(hitArray[i]);
    }

    // If grouping was enabled, group the hits and finish all of them.
    if (groupCounts != null) 
    {
      result.facets = new ResultFacet[groupCounts.length];
      for (int i = 0; i < groupCounts.length; i++) {
        result.facets[i] = groupCounts[i].getResult();
        finishGroup(result.facets[i].rootGroup,
                    snippetMaker,
                    req,
                    weight,
                    boostSet);
      } // for if
    }

    // Done with that searcher
    searcher.close();
    searcher = null;
    assert req.maxDocs < 0 || hitVec.size() <= req.maxDocs;

    // Pack up the results into a tidy array.
    result.totalDocs = nDocsHit;
    result.startDoc = req.startDoc;
    result.endDoc = req.startDoc + hitVec.size();
    result.docHits = (DocHit[])hitVec.toArray(new DocHit[hitVec.size()]);

    // Make spelling suggestions if applicable.
    if (spellReader != null && req.spellcheckParams != null)
      spellCheck(req, result, tokFields);

    // All done.
    return result;
  } // processReq()

  /**
   * Checks spelling of query terms, if spelling suggestion is enabled and
   * the result falls below the cutoff threshholds.
   *
   * @param req   Original query request
   * @param res   Results of the query
   * @param tokFields  Set of tokenized fields (in case no field list was
   *                   specified in the query request.)
   */
  private void spellCheck(QueryRequest req, QueryResult res, Set tokFields)
    throws IOException 
  {
    // We can use a handy reference to the spellcheck params, and to the
    // field list.
    //
    SpellcheckParams params = req.spellcheckParams;

    // When checking the cutoffs, account for the possibility that
    // the query might be faceted, in which case we want the document
    // count of the biggest facet.
    //
    int totalDocs = res.totalDocs;
    if (res.facets != null) 
    {
      for (int i = 0; i < res.facets.length; i++) {
        if (res.facets[i].rootGroup != null)
          totalDocs = Math.max(totalDocs, res.facets[i].rootGroup.totalDocs);
      }
    }

    // Check the cutoffs. If the documents scored well, or there were
    // a lot of them, then suggestions aren't needed.
    //
    if (params.docScoreCutoff > 0 && maxDocScore > params.docScoreCutoff)
      return;
    if (params.totalDocsCutoff > 0 && totalDocs > params.totalDocsCutoff)
      return;

    // Gather the query terms, grouped by field set.
    Set spellFieldSet = params.fields != null ? params.fields : tokFields;
    LinkedHashMap fieldsMap = gatherKeywords(req.query, spellFieldSet);

    // Make suggestions for each field set.
    LinkedHashMap out = new LinkedHashMap();
    for (Iterator fi = fieldsMap.keySet().iterator(); fi.hasNext();) 
    {
      // Make a list of fields and terms.
      LinkedHashSet fieldsSet = (LinkedHashSet)fi.next();
      String[] fields = (String[])fieldsSet.toArray(new String[fieldsSet.size()]);

      LinkedHashSet termsSet = (LinkedHashSet)fieldsMap.get(fieldsSet);
      String[] terms = (String[])termsSet.toArray(new String[termsSet.size()]);

      // Get some suggestions
      String[] suggested = spellReader.suggestKeywords(terms);

      // If no suggestions, skip these fields.
      if (suggested == null)
        continue;
      assert suggested.length == terms.length;

      // Record each suggestion.
      for (int i = 0; i < suggested.length; i++) 
      {
        // Skip duplicated in different field sets... retain only the
        // first one.
        //
        if (out.containsKey(terms[i]))
          continue;

        // Skip suggestions that don't change anything.
        if (terms[i].equals(suggested[i]))
          continue;

        // Okay, record it.
        SpellingSuggestion sugg = new SpellingSuggestion();
        sugg.origTerm = terms[i];
        sugg.fields = fields;
        sugg.suggestedTerm = suggested[i];
        out.put(terms[i], sugg);
      }
    } // for fi

    // If no suggestions, we're done.
    if (out.size() == 0)
      return;

    // Make sure the suggestions result in better results.
    if (!spellingImprovesResults(req, res, spellFieldSet, out))
      return;

    // Record the final suggestions in an array.
    res.suggestions = (SpellingSuggestion[])out.values().toArray(
      new SpellingSuggestion[out.values().size()]);
  } // spellCheck()

  /**
   * Re-runs the original query, except with terms replaced by their suggestions.
   * Checks that the results are improved -- at present that means that there
   * are more of them, and their max score is higher.
   *
   * @param origReq   Original query request
   * @param origRes   Results of the original query
   * @param spellFieldSet  Set of fields to rewrite terms within
   * @param suggs     Map of terms to their suggested replacements
   * @return          true if the suggestions improve the results.
   * @throws IOException
   */
  private boolean spellingImprovesResults(QueryRequest origReq,
                                          QueryResult origRes,
                                          Set spellFieldSet, LinkedHashMap suggs)
    throws IOException 
  {
    // First, clone the original request, and then turn off spellcheck for
    // the clone so that we don't get in an infinite recursive loop.
    //
    QueryRequest newReq = (QueryRequest)origReq.clone();
    newReq.spellcheckParams = null;

    // Before re-querying, save the max doc score.
    float origMaxDocScore = maxDocScore;

    // Now apply the spelling suggestions to the original query.
    newReq.query = new SpellSuggRewriter(suggs, spellFieldSet).rewriteQuery(
      newReq.query);
    QueryResult newRes = this.processRequest(newReq);

    // If the new query returns nothing and the old query also returned
    // nothing, it's a semi-failure. There's no use suggesting the new
    // words even if they are better, because it won't help the user.
    //
    if (newRes.totalDocs == 0 && origRes.totalDocs == 0) 
    {
      //System.out.print("No docs before or after: " + newReq.query.toString() + "... ");
      return false;
    }

    // If the new query returns less results, consider it a failure.
    if (newRes.totalDocs < origRes.totalDocs) 
    {
      //System.out.print("Fewer docs: " + newReq.query.toString() + "... ");
      return false;
    }

    // If the max doc score is lower, that's also a failure.
    if (maxDocScore < origMaxDocScore) 
    {
      //System.out.print("Lower score: " + newReq.query.toString() + "... ");
      return false;
    }

    // Cool! We think this is a better query.
    return true;
  }

  /**
   * Make a list of all the terms present in the given query,
   * grouped by field set.
   *
   * @param query          The query to traverse
   * @param desiredFields  The set of fields to limit to. If null, all
   *                       fields are considered.
   *
   * @return  An ordered map consisting of entries of a key and a
   *          value. The key is an ordered set of field names.
   *          The value is an ordered set of words.
   */
  private LinkedHashMap gatherKeywords(Query query, final Set desiredFields) 
  {
    // Make an ordered set of words, each with an ordered list of fields
    final LinkedHashMap termMap = new LinkedHashMap();

    XtfQueryTraverser trav = new XtfQueryTraverser() 
    {
      private void add(Term t) {
        final String field = t.field();
        final String word = t.text();
        if (desiredFields != null && !desiredFields.contains(field))
          return;
        if (!termMap.containsKey(word))
          termMap.put(word, new LinkedHashSet());
        ((LinkedHashSet)termMap.get(word)).add(field);
      }

      public void traverseQuery(Query q) 
      {
        // Skip queries boosted to nothing
        if (q.getBoost() > 0.001f)
          super.traverseQuery(q);
      }

      protected void traverse(TermQuery q) {
        add(q.getTerm());
      }

      protected void traverse(SpanTermQuery q) {
        add(q.getTerm());
      }
      
      protected void traverse(SpanExactQuery q) {
        // Do not correct inside "exact" queries
      }

      protected void traverse(BooleanQuery bq) 
      {
        BooleanClause[] clauses = bq.getClauses();
        for (int i = 0; i < clauses.length; i++) {
          if (clauses[i].getOccur() != BooleanClause.Occur.MUST_NOT)
            traverseQuery(clauses[i].getQuery());
        }
      } // traverse()

      protected void traverse(SpanChunkedNotQuery nq) 
      {
        traverseQuery(nq.getInclude());

        // No: traverseQuery(nq.getExclude());
      } // traverse()

      protected void traverse(SpanNotQuery nq) 
      {
        traverseQuery(nq.getInclude());

        // No: traverseQuery(nq.getExclude());
      } // traverse()

      protected void traverse(SpanNotNearQuery nq) 
      {
        traverseQuery(nq.getInclude());

        // No: traverseQuery(nq.getExclude());
      } // traverse()
    };
    trav.traverseQuery(query);

    // Now invert: for each unique set of fields, make an ordered list
    // of the keywords.
    //
    LinkedHashMap fieldsMap = new LinkedHashMap();
    for (Iterator ti = termMap.keySet().iterator(); ti.hasNext();) 
    {
      String word = (String)ti.next();
      LinkedHashSet fieldsSet = (LinkedHashSet)termMap.get(word);

      if (!fieldsMap.containsKey(fieldsSet))
        fieldsMap.put(fieldsSet, new LinkedHashSet());
      ((LinkedHashSet)fieldsMap.get(fieldsSet)).add(word);
    }

    // All done.
    return fieldsMap;
  } // gatherKeywords()

  /**
   * Create the GroupCounts objects for the given query request. Also handles
   * creating the proper hit queue for each one.
   *
   * @param req       query request containing group specs
   * @param query     query to use to form dynamic groups
   * @param searcher  searcher for dynamic groups
   * @param boostSet  boost set for dynamic groups
   */
  private GroupCounts[] prepGroups(final QueryRequest req,
                                   final BoostSet boostSet,
                                   RecordingSearcher searcher, Query query)
    throws IOException 
  {
    GroupData[] groupData = new GroupData[req.facetSpecs.length];
    Vector dynamicGroupVec = new Vector();

    // First get data for each group
    for (int i = 0; i < req.facetSpecs.length; i++) 
    {
      FacetSpec spec = req.facetSpecs[i];
      if (spec.field.startsWith("java:")) {
        groupData[i] = createDynamicGroup(indexReader, spec.field);
        dynamicGroupVec.add(groupData[i]);
      }
      else
        groupData[i] = StaticGroupData.getCachedData(indexReader, spec.field);
    }

    // If there are dynamic groups, pre-scan the query and hand them the
    // documents and scores.
    //
    if (!dynamicGroupVec.isEmpty()) 
    {
      final DynamicGroupData[] dynGroups = (DynamicGroupData[])dynamicGroupVec.toArray(
        new DynamicGroupData[dynamicGroupVec.size()]);
      searcher.search(query, null,
                      new SpanHitCollector() 
      {
          public void collect(int doc, float score, FieldSpanSource spanSource) 
          {
            // Apply a boost (if there's a boost set)
            score = applyBoost(doc, score, boostSet, req);

            // If document isn't deleted, collect it.
            if (score > 0.0f) {
              for (int i = 0; i < dynGroups.length; i++)
                dynGroups[i].collect(doc, score);
            }
          } // collect()
      });

      // Finish off the dynamic group data.
      for (int i = 0; i < dynGroups.length; i++)
        dynGroups[i].finish();
    } // if

    // Now make a GroupCount object around each data object.
    GroupCounts[] groupCounts = new GroupCounts[req.facetSpecs.length];
    for (int i = 0; i < req.facetSpecs.length; i++) {
      FacetSpec spec = req.facetSpecs[i];
      HitQueueMakerImpl maker = new HitQueueMakerImpl(indexReader,
                                                      spec.sortDocsBy,
                                                      isSparse);
      groupCounts[i] = new GroupCounts(groupData[i], spec, maker);
    }

    // All done.
    return groupCounts;
  } // prepGroups()

  /**
   * Create a dynamic group based on a field specification.
   *
   * @param indexReader   Where to get the data from
   * @param field         Special field name starting with "java:"
   * @return              Dynamic group data
   * @throws IOException
   */
  private GroupData createDynamicGroup(IndexReader indexReader, String field)
    throws IOException 
  {
    // Parse out the class name and parameters
    Pattern pat = Pattern.compile("java:([\\w.]+)\\((.*)\\)");
    Matcher matcher = pat.matcher(field);
    if (!matcher.matches())
      throw new RuntimeException(
        "Unrecognized dynamic facet field '" + field + "'");

    String className = matcher.group(1);
    String params = matcher.group(2);

    // Create an instance of the given class.
    DynamicGroupData dynData = null;
    try {
      Class c = Class.forName(className);
      dynData = (DynamicGroupData)c.newInstance();
    }
    catch (ClassNotFoundException e) {
      throw new RuntimeException(
        "Dynamic facet class '" + className + "' not found");
    }
    catch (InstantiationException e) {
      throw new RuntimeException(
        "Cannot instantiate dynamic facet class '" + className + "'",
        e);
    }
    catch (IllegalAccessException e) {
      throw new RuntimeException(
        "Cannot instantiate dynamic facet class '" + className + "'",
        e);
    }
    catch (ClassCastException e) {
      throw new RuntimeException(
        "Class '" + className + "' must be derived from DynamicGroupData");
    }

    // Initialize the new instance, and we're done.
    dynData.init(indexReader, tokFields, params);
    return dynData;
  } // createDynamicGroup()

  /**
   * Finishes DocHits within a single group (also processes all its
   * descendant groups.)
   *
   * @param group         Group to finish
   * @param snippetMaker  Used to make snippets for any DocHits inside the
   *                      group.
   * @param req           Determines whether to finish with 'explain' or not
   * @param weight        Used for score explanations
   * @param boostSet      Used for score explanations
   */
  private void finishGroup(ResultGroup group, SnippetMaker snippetMaker,
                           QueryRequest req, Weight weight, BoostSet boostSet)
    throws IOException 
  {
    // Finish DocHits for this group
    if (group.docHits != null) 
    {
      for (int k = 0; k < group.docHits.length; k++) 
      {
        DocHitImpl hit = (DocHitImpl)group.docHits[k];
        if (req.explainScores) {
          hit.finishWithExplain(snippetMaker,
                                docScoreNorm,
                                weight,
                                boostSet,
                                req.boostSetParams);
        }
        else
          hit.finish(snippetMaker, docScoreNorm);
      } // for k
    }

    // Now finish all the descendants.
    if (group.subGroups != null) {
      for (int j = 0; j < group.subGroups.length; j++)
        finishGroup(group.subGroups[j], snippetMaker, req, weight, boostSet);
    }
  } // finishGroup()

  /**
   * QueryProcessor maintains a static cache of Lucene searchers, one for
   * each index directory. If data is changed, normally it's not recognized
   * until a periodic (every 30 seconds) check.
   *
   * Calling this method forces new changes to an index to be immediately
   * recognized.
   */
  public void resetCache() {
    searchers.clear();
  } // resetCache()

  /**
   * If a boost set was specified, boost the given document's score according to the
   * set.
   */
  private float applyBoost(int doc, float score, BoostSet boostSet,
                           QueryRequest req) 
  {
    // If we're boosting, apply that factor.
    if (score > 0 && boostSet != null) {
      float boost = boostSet.getBoost(doc, req.boostSetParams.defaultBoost);
      if (req.boostSetParams.exponent != 1.0f)
        boost = (float)Math.pow(boost, req.boostSetParams.exponent);
      score *= boost;
    }

    return score;
  }
  
  /**
   * Creates either a standard score-sorting hit queue, or a field-sorting
   * hit queue, depending on whether the query is to be sorted.
   *
   * @param reader     will be used to read the field contents
   * @param inSize     size of the queue (typically startDoc + maxDocs). If
   *                   this number is >= 999999, an infinitely resizing
   *                   queue will be created.
   * @param sortFields space or comma delimited list of fields to sort by
   * @param isSparse   if index is sparse (i.e. more than 5 chunks per doc)
   * @return           an appropriate hit queue
   */
  private static PriorityQueue createHitQueue(IndexReader reader, int inSize,
                                              String sortFields,
                                              boolean isSparse)
    throws IOException 
  {
    // If a large size is requested, start with a small queue and expand
    // later, if necessary.
    //
    int size = (inSize >= 999999) ? 1 : inSize;

    // If no sort fields, do a simple score sort.
    PriorityQueue ret;
    if (sortFields == null)
      ret = new HitQueue(size);
    else
    {
      // Parse out the list of fields to sort by.
      Vector fieldNames = new Vector();
      StringTokenizer st = new StringTokenizer(sortFields, " \t\r\n,;");
      while (st.hasMoreTokens())
        fieldNames.add(st.nextToken());
  
      // If there were none, do a simple score sort.
      if (fieldNames.size() == 0)
        ret = new HitQueue(size);
      else
      {
        // Okay, make a SortField out of each one, in priority order from 
        // highest to lowest. After all the fields, an implicit score sorter 
        // is added so that documents which match in all other respects
        // will come out ordered by score.
        //
        // Each name can be optionally prefixed with "-" to sort in reverse,
        // or "+" to sort in normal order (but "+" is unnecessary, since
        // normal order is the default.)
        //
        // There's also a more verbose and powerful way to affect sort order: 
        // modifiers. Possible modifiers are ":ascending", ":descending", 
        // ":emptyFirst", and ":emptyLast".
        //
        SortField[] fields = new SortField[fieldNames.size() + 2];
        for (int i = 0; i < fieldNames.size(); i++) 
        {
          String name = (String)fieldNames.elementAt(i);
          boolean ascending = false;
          boolean descending = false;
          boolean emptyFirst = false;
          boolean emptyLast = false;
          
          // Check for the short-hand "-" and "+" prefixes
          if (name.startsWith("-")) {
            descending = true;
            name = name.substring(1);
          }
          else if (name.startsWith("+")) {
            ascending = true;
            name = name.substring(1);
          }

          // Check for more verbose ":" modifiers after the field name
          String[] parts = name.split(":");
          name = parts[0];
          for (int j=1; j<parts.length; j++) 
          {
            if (parts[j].equalsIgnoreCase("ascending"))
              ascending = true;
            else if (parts[j].equalsIgnoreCase("descending"))
              descending = true;
            else if (parts[j].equalsIgnoreCase("emptyFirst"))
              emptyFirst = true;
            else if (parts[j].equalsIgnoreCase("emptyLast"))
              emptyLast = true;
            else
              throw new IOException("Unknown sort modifier: '" + parts[j] + "'");
          }
          
          // Check for conflicting modifiers.
          if ((ascending && descending) || (emptyFirst && emptyLast))
            throw new IOException("Conflicting sort modifiers");
          
          // Interpret the modifiers.
          boolean reverse;
          if (ascending)
            reverse = false;
          else if (descending)
            reverse = true;
          else
            reverse = false; // default
          
          boolean flipEmpty;
          if (!reverse) {
            if (emptyFirst)
              flipEmpty = true;
            else if (emptyLast)
              flipEmpty = false;
            else
              flipEmpty = false; // default
          }
          else {
            if (emptyFirst)
              flipEmpty = false;
            else if (emptyLast)
              flipEmpty = true;
            else
              flipEmpty = true; // default
          }
    
          String finalName = flipEmpty ? (name + ":flipEmpty") : name;
          
          // Though not strictly necessary, allow the user to specify "score" or
          // "relevance" to sort by those. That way, automated programs can always give
          // a "sortDocsBy" field.
          //
          if (name.equals("score") || name.equals("relevance")) {
            if (reverse || flipEmpty)
              throw new RuntimeException("Illegal modifier on sortDocsBy 'score'");
            fields[i] = SortField.FIELD_SCORE;
          }
          else if (name.equals("totalHits"))
            fields[i] = new SortField(finalName, totalHitsComparator, reverse);
          else if (isSparse)
            fields[i] = new SortField(finalName, sparseStringComparator, reverse);
          else
            fields[i] = new SortField(finalName, compactStringComparator, reverse);
        }
        
        // Default tie-breakers: first, score. If score is equal, sort by doc ID.
        fields[fieldNames.size()]   = SortField.FIELD_SCORE;
        fields[fieldNames.size()+1] = SortField.FIELD_DOC;
    
        // And make the final hit queue.
        ret = new FieldSortedHitQueue(reader, fields, size);
      }
    }
    
    // If a ton of hits is requested, make the queue into a resizing one.
    if (inSize >= 999999)
      ret.setExpandable();
    
    // All done.
    return ret;
  } // createHitQueue()

  private static class DocHitMakerImpl implements GroupCounts.DocHitMaker 
  {
    private int doc;
    private float score;
    private FieldSpanSource spanSrc;
    private DocHitImpl docHit;

    public final void reset(int doc, float score, FieldSpanSource spanSrc) 
    {
      this.doc = doc;
      this.score = score;
      this.spanSrc = spanSrc;

      docHit = null;
    }

    public final int getDocNum() {
      return doc;
    }

    public final float getScore() {
      return score;
    }

    public final boolean insertInto(PriorityQueue queue) 
    {
      if (docHit == null)
        docHit = new DocHitImpl(doc, score);

      try 
      {
        docHit.setSpanSource(spanSrc);
        boolean inserted = queue.insert(docHit);
        
        // If we're keeping this hit, make sure spans have been grabbed. 
        if (inserted)
          docHit.totalSnippets();
        
        return inserted;
      }
      finally {
        docHit.setSpanSource(null); // prevent memory leaks
      }

    }
  } // class DocHitMaker

  private static class HitQueueMakerImpl implements GroupCounts.HitQueueMaker 
  {
    private IndexReader reader;
    private String sortFields;
    private boolean isSparse;

    public HitQueueMakerImpl(IndexReader reader, String sortFields,
                             boolean isSparse) 
    {
      this.reader = reader;
      this.sortFields = sortFields;
      this.isSparse = isSparse;
    }

    public PriorityQueue makeQueue(int size) 
    {
      try {
        return DefaultQueryProcessor.createHitQueue(reader,
                                                    size,
                                                    sortFields,
                                                    isSparse);
      }
      catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  } // class HitQueueMakerImpl
} // class QueryProcessor