RankingQueryRunnerImpl.java example

Explorer

mimir-master
- etc
  - generated-header.java
- mimir-client
  - src
    - gate
      - mimir
        index
        MimirConnector.java
        MimirIndexingPR.java
        search
        RemoteQueryRunner.java
        tool
        WebUtils.java
- mimir-cloud
  - archive-unpacker
    - src
      - gate
        mimir
        util
        MultiFileInputStream.java
        UnpackWizard.java
  - src
    - java
      - gate
        mimir
        util
        IndexArchiveState.java
        MultiFileOutputStream.java
- mimir-core
  - src
    - gate
      - mimir
        AbstractSemanticAnnotationHelper.java
        Constraint.java
        ConstraintType.java
        DocumentMetadataHelper.java
        DocumentRenderer.java
        IndexConfig.java
        MimirIndex.java
        SemanticAnnotationHelper.java
        index
        AtomicAnnotationIndex.java
        AtomicIndex.java
        AtomicTokenIndex.java
        DocumentCollection.java
        DocumentData.java
        GATEDocument.java
        GATEDocumentFactory.java
        IndexException.java
        Mention.java
        OriginalMarkupMetadataHelper.java
        package-info.java
        search
        FederatedQueryRunner.java
        IndexReaderPool.java
        QueryEngine.java
        QueryRunner.java
        RankingQueryRunnerImpl.java
        query
        AbstractIntersectionQueryExecutor.java
        AbstractOverlapQuery.java
        AbstractQueryExecutor.java
        AndQuery.java
        AnnotationQuery.java
        Binding.java
        ConstQuery.java
        ContainsQuery.java
        ExecutorsList.java
        GapQuery.java
        MinusQuery.java
        OrQuery.java
        QueryExecutor.java
        QueryNode.java
        RepeatsQuery.java
        SequenceQuery.java
        TermQuery.java
        WithinQuery.java
        parser
        ParseException.java
        Query.java
        QueryParser.java
        QueryParserConstants.java
        QueryParserTokenManager.java
        SimpleCharStream.java
        Token.java
        TokenMgrError.java
        score
        BindingScorer.java
        DelegatingScoringQueryExecutor.java
        MimirScorer.java
        terms
        AbstractCompoundTermsQuery.java
        AbstractDocumentsBasedTermsQuery.java
        AbstractIndexTermsQuery.java
        AndTermsQuery.java
        AnnotationTermsQuery.java
        CompoundTermsQuery.java
        ConstTermsQuery.java
        DocumentTermsQuery.java
        DocumentsAndTermsQuery.java
        DocumentsBasedTermsQuery.java
        DocumentsOrTermsQuery.java
        LimitTermsQuery.java
        OrTermsQuery.java
        SortedTermsQuery.java
        TermTypeTermsQuery.java
        TermsQuery.java
        TermsResultSet.java
        util
        DefaultMentionDescriber.java
        DelegatingSemanticAnnotationHelper.java
        DocumentFeaturesMetadataHelper.java
        IgnoreEmptiesTermProcessor.java
        IndexUpgrader.java
        MG4JTools.java
        NormalizingTermProcessor.java
        OntologyMentionDescriber.java
        TruncateIndex.java
- mimir-test
  - src
    - gate
      - mimir
        test
        QueryTests.java
        RenderZipCollection.java
        Scratch.java
        ScratchConsole.java
        TestQueryParser.java
        TestUtils.java
- mimir-web
  - src
    - gwt
      - gate
        mimir
        web
        client
        UI.java
    - java
      - gate
        mimir
        util
        LogAnalyser.java
        web
        client
        DocumentData.java
        GwtRpcService.java
        GwtRpcServiceAsync.java
        MimirSearchException.java
        ResultsData.java
- plugins
  - db-h2
    - src
      - gate
        mimir
        db
        AnnotationTemplateCache.java
        DBSemanticAnnotationHelper.java
  - measurements
    - src
      - gate
        mimir
        measurements
        MeasurementAnnotationHelper.java
        MeasurementPluginResource.java
  - sparql
    - src
      - gate
        mimir
        sparql
        RequestMethod.java
        SPARQLResultSet.java
        SPARQLSemanticAnnotationHelper.java

/*
 *  RankingQueryRunnerImpl.java
 *
 *  Copyright (c) 1995-2010, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Valentin Tablan, 16 Nov 2011
 *
 *  $Id$
 */
package gate.mimir.search;

import gate.mimir.index.IndexException;
import gate.mimir.search.query.Binding;
import gate.mimir.search.query.QueryExecutor;
import gate.mimir.search.query.QueryNode;
import gate.mimir.search.score.MimirScorer;
import it.unimi.dsi.fastutil.doubles.DoubleBigArrayBigList;
import it.unimi.dsi.fastutil.longs.LongBigArrayBigList;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2ObjectAVLTreeMap;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectBigArrayBigList;
import it.unimi.dsi.fastutil.objects.ObjectBigList;
import it.unimi.dsi.fastutil.objects.ObjectList;

import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;

import org.apache.log4j.Logger;

/**
 * A QueryRunner implementation that can perform ranking.
 * This query runner has two modes of functioning: ranking and non-ranking, 
 * depending on whether a {@link MimirScorer} is provided  during construction
 * or not.
 * All documents are referred to using their rank (i.e. position in the list of 
 * results). When working in non-ranking mode, ranking order is the same as 
 * document ID order.
 */
public class RankingQueryRunnerImpl implements QueryRunner {
  
  
  /**
   * Constant used as a flag to mark then of a list of tasks.
   */
  private static final Runnable NO_MORE_TASKS = new Runnable(){ 
    public void run() {}
  };
  
  /**
   * The background thread implementation: simply collects {@link Runnable}s 
   * from the {@link RankingQueryRunnerImpl#backgroundTasks} queue and runs them. 
   */
  protected class BackgroundRunner implements Runnable {
    @Override
    public void run() {
      try {
        while(!closed) {
          Runnable job = backgroundTasks.take();
          if(job == NO_MORE_TASKS) break;
          else  job.run();
        }
      } catch(InterruptedException e) {
        Thread.currentThread().interrupt();
        e.printStackTrace();
      }
    }
  }
   
  
  /**
   * Collects the document hits (i.e. {@link Binding}s) for the documents 
   * between the two provided ranks (indexes in the {@link #documentsOrder} 
   * list. If ranking is not being performed ( {@link #documentsOrder} is
   * <code>null</null>, then the indexes are used against the 
   * {@link #documentIds} list.
   * 
   * This is the only actor that writes to the {@link #documentHits} list.
   */  
  protected class HitsCollector implements Runnable {
    /**
     * The starting rank
     */
    long start;
    
    /**
     * The ending rank
     */
    long end;
    
    public HitsCollector(long rangeStart, long rangeEnd) {
      this.start = rangeStart;
      this.end = rangeEnd;
    }
    
    @Override
    public void run() {
      long[] documentIndexes = null;
      if(ranking) {
        // we're ranking -> first calculate the range of documents in ID order
        documentIndexes = new long[(int)(end - start)];
        for(long i = start; i < end; i++) {
          documentIndexes[(int)(i - start)] = documentsOrder.getLong(i);
        }
        Arrays.sort(documentIndexes);
      }
      
      try {
        // see if we can get at the first document
        long docIndex = (documentIndexes != null ? documentIndexes[0] : start);
        long docId = documentIds.getLong(docIndex);
        if(queryExecutor.getLatestDocument() < 0 ||
           queryExecutor.getLatestDocument() >= docId) {
          // we need to 'scroll back' the executor: get a new executor
          QueryExecutor oldExecutor = queryExecutor;
          queryExecutor = queryExecutor.getQueryNode().getQueryExecutor(
                  queryEngine);
          oldExecutor.close();
        }
        for(long i = start; i < end; i++) {
          docIndex = (documentIndexes != null ? 
              documentIndexes[(int)(i - start)] : i);
          docId = documentIds.getLong(docIndex);
          // don't need to check for deletion here as we know for sure that this
          // doc ID is ok.  The only exception would be if it was deleted since
          // this query was originally issued, but I think we can live with that
          long newDoc = queryExecutor.nextDocument(docId - 1);
          // sanity check
          if(newDoc == docId) {
            List<Binding> hits = new ObjectArrayList<Binding>();
            Binding aHit = queryExecutor.nextHit();
            while(aHit != null) {
              hits.add(aHit);
              aHit = queryExecutor.nextHit();
            }
            documentHits.set(docIndex, hits);
          } else {
            // this could happen if we've been closed in the mean time
            if(closed) return;
            // we got the wrong document ID
            logger.error("Unexpected document ID returned by executor " +
            		"(got " + newDoc + " while expecting " + docId + "!");
          }
        }
      } catch(IOException e) {
        // this could happen if we've been closed in the mean time
        if(closed) return;
        // otherwise, it's an error
        logger.error("Exception while restarting the query executor.", e);
        try {
          close();
        } catch(IOException e1) {
          logger.error("Exception while closing the query runner.", e1);
        }
      }
    }
  }
  
  
  /**
   * The first action started when a new {@link RankingQueryRunnerImpl} is 
   * created. It performs the following actions:
   * <ul>
   *   <li>collects all document IDs in 
   *   {@link RankingQueryRunnerImpl#documentIds}</li>
   *   <li>if ranking enabled
   *     <ul>
   *     <li>it collects all document scores
   *     </ul>
   *   </li>  
   *   <li>if ranking not enabled
   *     <ul>
   *       <li>it collects the document hits for the first 
   *       block of documents</li>
   *     </ul>
   *   </li>
   *   <li>If ranking enabled, after all document IDs are obtained, it starts 
   *   the work for ranking the first block of documents (which, upon 
   *   completion, will also start a background job to collect all the hits for 
   *   that block).</li>  
   * </ul>
   */
  protected class DocIdsCollector implements Runnable {
    @Override
    public void run() {
      try{
        // collect all documents and their scores
        if(ranking) scorer.wrap(queryExecutor);
        long docId = nextNotDeleted();
        while(docId >= 0) {
          // enlarge the hits list
          if(ranking){
            documentScores.add(scorer.score());
            documentHits.add(null);
          } else {
            // not scoring: also collect the hits for the first block of documents
            if(docId < docBlockSize) {
              ObjectList<Binding> hits = new ObjectArrayList<Binding>();
              Binding hit = queryExecutor.nextHit();
              while(hit != null) {
                hits.add(hit);
                hit = queryExecutor.nextHit();
              }
              documentHits.add(hits);
            } else {
              documentHits.add(null);
            }
          }
          // and store the new doc ID
          documentIds.add(docId);
          docId = nextNotDeleted();
        }
        allDocIdsCollected = true;
        if(ranking) {
          // now rank the first batch of documents
          // this will also start a second background job to collect the hits
          rankDocuments(docBlockSize -1);
        }
      } catch (Exception e) {
        // this could happen if we've been closed in the mean time
        if(closed) return;
        // otherwise, it's an error
        logger.error("Exception while collecting document IDs", e);
        try {
          close();
        } catch(IOException e1) {
          logger.error("Exception while closing, after exception.", e1);
        }
      }
    }
  }
  
  /**
   * Shared logger instance.
   */
  protected static Logger logger =  Logger.getLogger(RankingQueryRunnerImpl.class);
  
  /**
   * The {@link QueryExecutor} for the query being run.
   */
  protected QueryExecutor queryExecutor;
  
  /**
   * The QueryEngine we run inside.
   */
  protected QueryEngine queryEngine;
  
  /**
   * The {@link MimirScorer} to be used for ranking documents.
   */
  protected MimirScorer scorer;
  
  /**
   * Flag set to <code>true</code> when ranking is being performed, or 
   * <code>false</code> otherwise.
   */
  final boolean ranking;

  /**
   * The number of documents to be ranked (of have their hits collected) as a 
   * block.
   */
  protected int docBlockSize;
  
  /**
   * The document IDs for the documents found to contain hits. This list is
   * sorted in ascending documentID order.
   */
  protected LongBigList documentIds;
  
  /**
   * If scoring is enabled ({@link #scorer} is not <code>null</code>), this list
   * contains the scores for the documents found to contain hits. This list is 
   * aligned to {@link #documentIds}.   
   */
  protected DoubleBigArrayBigList documentScores;
  
  /**
   * The sets of hits for each returned document. This data structure is lazily 
   * built, so some elements may be null. This list is aligned to 
   * {@link #documentIds}.   
   */
  protected ObjectBigList<List<Binding>> documentHits;

  /**
   * The order the documents should be returned in (elements in this list are 
   * indexes in {@link #documentIds}).
   */
  protected LongBigList documentsOrder;
  
  /**
   * Data structure holding references to {@link Future}s that are currently 
   * working (or have worked) on collecting hits for a range of document 
   * indexes.
   */
  protected SortedMap<long[], Future<?>> hitCollectors;
  
  /**
   * The background thread used for collecting hits.
   */
  protected Thread runningThread;
  
  /**
   * A queue with tasks to be executed by the background thread. 
   */
  protected BlockingQueue<Runnable> backgroundTasks;
  
  /**
   * Flag used to mark that all results documents have been counted.
   */
  protected volatile boolean allDocIdsCollected = false;
  
  /**
   * The task that's working on collecting all the document IDs. When this 
   * activity has finished, the precise documents count is known.
   */
  protected volatile FutureTask<Object> docIdCollectorFuture;
  
  /**
   * Internal flag used to mark when this query runner has been closed.
   */
  protected volatile boolean closed;
  
  /**
   * Creates a query runner in ranking mode.
   * @param qNode the {@link QueryNode} for the query being executed.
   * @param scorer the {@link MimirScorer} to use for ranking.
   * @param qEngine the {@link QueryEngine} used for executing the queries.
   * @throws IOException
   */
  public RankingQueryRunnerImpl(QueryExecutor executor, MimirScorer scorer) throws IOException {
    this.queryExecutor = executor;
    this.scorer = scorer;
    this.closed = false;
    ranking = scorer != null;
    queryEngine = queryExecutor.getQueryEngine();
    docBlockSize = queryEngine.getDocumentBlockSize();
    documentIds = new LongBigArrayBigList();
    documentHits = new ObjectBigArrayBigList<List<Binding>>();
    if(scorer != null) {
      documentScores = new DoubleBigArrayBigList();
      documentsOrder = new LongBigArrayBigList(docBlockSize);
    }
    hitCollectors = new Object2ObjectAVLTreeMap<long[], Future<?>>(
        new Comparator<long[]>(){
          @Override
          public int compare(long[] o1, long[] o2) {
            long res = o1[0] - o2[0]; 
            return res > 0 ? 1 : (res == 0 ? 0 : -1); 
          }
        });
    // start the background thread
    backgroundTasks = new LinkedBlockingQueue<Runnable>();
    Runnable backgroundRunner = new BackgroundRunner();
    //get a thread from the executor, if one exists
    if(queryEngine.getExecutor() != null){
      try {
        queryEngine.getExecutor().execute(backgroundRunner);
      } catch(RejectedExecutionException e) {
        logger.warn("Could not allocate a new background thread", e);
        throw new RejectedExecutionException(
          "System overloaded, please try again later."); 
      }
    }else{
      Thread theThread = new Thread(backgroundRunner, getClass().getName());
      theThread.setDaemon(true);
      theThread.start();
    }

    // queue a job for collecting all document ids
    try {
      docIdCollectorFuture = new FutureTask<Object>(new DocIdsCollector(), null);
      backgroundTasks.put(docIdCollectorFuture);
      if(!ranking) {
        // if not ranking, the doc IDs collector will all collect the
        // hits for the first docBlockSize number of documents
        synchronized(hitCollectors) {
          hitCollectors.put(new long[]{0, docBlockSize}, docIdCollectorFuture);
        }
      }
    } catch(InterruptedException e) {
      Thread.currentThread().interrupt();
      logger.error("Could not queue a background task.", e);
    }
  }
  
  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getDocumentsCount()
   */
  @Override
  public long getDocumentsCount() {
    if(allDocIdsCollected) return documentIds.size64();
    else return -1;
  }
  
  /**
   * Synchronous version of {@link #getDocumentsCount()} that waits if necessary
   * before returning the correct result (instead of returning  <code>-1</code>
   * of the value is not yet known).
   * @return the total number of documents found to match the query.
   */
  @Override
  public long getDocumentsCountSync() {
    try {
      docIdCollectorFuture.get();
    } catch(Exception e) {
      logger.error("Exception while getting all document IDs", e);
      throw new IllegalStateException(
        "Exception while getting all document IDs", e);
    }
    return getDocumentsCount();
  }

  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getCurrentDocumentsCount()
   */
  @Override
  public long getDocumentsCurrentCount() {
    return documentIds.size64();
  }
  
  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getDocumentID(int)
   */
  @Override
  public long getDocumentID(long rank) throws IndexOutOfBoundsException, IOException {
    return documentIds.getLong(getDocumentIndex(rank));
  }
  
  @Override
  public double getDocumentScore(long rank) throws IndexOutOfBoundsException, IOException {
    return (documentScores != null) ? 
        documentScores.getDouble(getDocumentIndex(rank)) : 
        DEFAULT_SCORE;
  }

  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getDocumentHits(int)
   */
  @Override
  public List<Binding> getDocumentHits(long rank) throws IndexOutOfBoundsException, IOException {
    long documentIndex = getDocumentIndex(rank);
    List<Binding> hits = documentHits.get(documentIndex);
    if(hits == null) {
      // hits not collected yet
      try {
        // find the Future working on it, or start a new one, 
        // then wait for it to complete
        collectHits(new long[]{rank, rank + 1}).get();
        hits = documentHits.get(documentIndex);
      } catch(Exception e) {
        logger.error("Exception while waiting for hits collection", e);
        throw new RuntimeException(
          "Exception while waiting for hits collection", e); 
      }
    }
    return hits;
  }
  
  /**
   * Given a document rank, return its index in the {@link #documentIds} list.
   * If ranking is not being performed, then the rank is interpreted as an index 
   * against the {@link #documentIds} list and is simply returned. 
   * @param rank
   * @return
   * @throws IOException, IndexOutOfBoundsException 
   */
  protected long getDocumentIndex(long rank) throws IOException, 
      IndexOutOfBoundsException {
    long maxRank = documentIds.size64();
    if(rank >= maxRank) throw new IndexOutOfBoundsException(
      "Document rank too large (" + rank + " > " + maxRank + ".");
    if(documentsOrder != null) {
      // we're in ranking mode
      if(rank >= documentsOrder.size64()) {
        // document exists, but has not been ranked yet
        rankDocuments(rank);
      }
      return documentsOrder.getLong(rank);
    } else {
      return rank;
    }
  }
  
  /**
   * Ranks some more documents (i.e. adds more entries to the 
   * {@link #documentsOrder} list, making sure that the document at provided 
   * rank is included (if such a document exists). If the provided rank is 
   * larger than the number of result documents, then all documents will be
   * ranked before this method returns. 
   * This is the only method that writes to the {@link #documentsOrder} list.
   * This method is executed synchronously in the client thread.
   *  
   * @param rank
   * @throws IOException 
   */
  protected void rankDocuments(long rank) throws IOException {
    if(rank < documentsOrder.size64()) return;
    synchronized(documentsOrder) {
      // rank some documents
      long rankRangeStart = documentsOrder.size64();
      // right boundary is exclusive
      long rankRangeEnd = rank + 1;
      if((rankRangeEnd - rankRangeStart) < (docBlockSize)) {
        // extend the size of the chunk of documents to be ranked
        rankRangeEnd = rankRangeStart + docBlockSize; 
      }
      // the document with the minimum score already ranked.
      long smallestOldScoreDocId = rankRangeStart > 0 ? 
        documentIds.getLong(documentsOrder.getLong(rankRangeStart -1))
        : -1;
      // the score for the document above, which is a the upper limit for new scores
      double smallestOldScore = rankRangeStart > 0 ? 
          documentScores.getDouble(documentsOrder.getLong(rankRangeStart -1))
          : Double.POSITIVE_INFINITY;
      // now collect some more documents
      for(long i = 0; i < documentIds.size64(); i++) {
        long documentId = documentIds.getLong(i);
        double documentScore = documentScores.getDouble(i);
        // the index for the document with the smallest score, 
        // from the new ones being ranked 
        long smallestDocIndex = rankRangeStart < documentsOrder.size64() ?
            documentsOrder.getLong(rankRangeStart) : -1;
        // the smallest score that's been seen in this new round 
        double smallestNewScore = smallestDocIndex == -1 ? Double.NEGATIVE_INFINITY : 
            documentScores.getDouble(smallestDocIndex);
        // we care about this new document if:
        // - we haven't collected enough documents yet, or
        // - it has a better score than the smallest score so far, but a 
        // smaller score than the maximum permitted score (i.e. it has not 
        // already been ranked)., or
        // - it's a new document (i.e. with an ID strictly larger) with the same 
        // score as the largest permitted score
        if(documentsOrder.size64() < rankRangeEnd
           || 
           (documentScore > smallestNewScore && documentScore < smallestOldScore) 
           ||
           (documentScore == smallestOldScore && documentId > smallestOldScoreDocId)
           ) {
          // find the rank for the new doc in the documentsOrder list, and insert
          documentsOrder.add(findRank(documentScore, rankRangeStart, 
              documentsOrder.size64()), i);
          // if we have too many documents, drop the lowest scoring one
          if(documentsOrder.size64() > rankRangeEnd) {
            documentsOrder.removeLong(documentsOrder.size64() - 1);
          }          
        }
      }
      // start collecting the hits for the newly ranked documents (in a new thread)
      if(documentsOrder.size64() > rankRangeStart){
        collectHits(new long[] {rankRangeStart, documentsOrder.size64()});
      }
    }
  }
  
  /**
   * Given a document score, finds the correct insertion point into the 
   * {@link #documentsOrder} list, within a given range of ranks.
   * This method performs binary search followed by a linear scan so that the 
   * returned insertion point is the largest correct one (i.e. later documents 
   * with the same score get sorted after earlier ones, thus keeping the sorting
   * stable).
   *      
   * @param documentScore the score for the new document.
   * @param start the start of the search range within {@link #documentsOrder} 
   * @param end the end of the search range within {@link #documentsOrder} 
   * @return the largest correct insertion point
   */
  protected long findRank(double documentScore, long start, long end) {
    // standard binary search
    double midVal;
    end--;
    while (start <= end) {
     long mid = (start + end) >>> 1;
     midVal = documentScores.getDouble(documentsOrder.getLong(mid));
     // note that the documentOrder list is in decreasing score order!
     if (midVal > documentScore) start = mid + 1;
     else if (midVal < documentScore) end = mid - 1;
     else {
       // we found a doc with exactly the same score: scan to the right
       while(documentsOrder.size64() < mid && 
             documentScores.getDouble(documentsOrder.getLong(mid)) == 
           documentScore){
         mid++;
       }
       return mid;
     }
    }
    return start;
  }
  
  /**
   * Makes sure all the documents in the specified range are queued for hit 
   * collection. 
   * @param interval the interval specified by 2 document ranks. The interval is
   * defined as the elements in {@link #documentsOrder} between ranks 
   * interval[0] and (interval[1]-1) inclusive. 
   * @return the future that has been queued for collecting the hits.
   */
  protected Future<?> collectHits(long[] interval) {
    // expand the interval to block size (or size of documentsOrder)
    if(interval[1] - interval[0] < docBlockSize) {
      final long expansion = docBlockSize - (interval[1] - interval[0]);
      // expand up to (expansion / 2) to the left
      interval[0] = Math.max(0, interval[0] - (expansion / 2));
      // expand to the right
      long upperBound = documentsOrder != null ? 
          documentsOrder.size64() : documentIds.size64();
      interval[1] = Math.min(upperBound, interval[0] + docBlockSize);
    }
    HitsCollector hitsCollector = null;
    synchronized(hitCollectors) {
      SortedMap<long[], Future<?>> headMap = hitCollectors.headMap(interval); 
      long[] previousInterval = headMap.isEmpty() ? new long[]{0, 0} : 
          headMap.lastKey();
      if(previousInterval[1] >= interval[1]) {
        // we're part of previous interval
        return hitCollectors.get(previousInterval);
      } else {
        // calculate an appropriate interval to collect hits for
        SortedMap<long[], Future<?>> tailMap = hitCollectors.tailMap(
          new long[]{interval[1], interval[1]});
        long[] followingInterval = tailMap.isEmpty() ? 
            new long[]{interval[1], interval[1]} : tailMap.firstKey();
        long start = Math.max(previousInterval[1] - 1, interval[0]);
        long end = Math.min(followingInterval[0], interval[1]);
        hitsCollector = new HitsCollector(start, end);
        FutureTask<?> future = new FutureTask<Object>(hitsCollector, null);
        hitCollectors.put(new long[]{start, end}, future);
        try {
          backgroundTasks.put(future);
        } catch(InterruptedException e) {
          logger.error("Error while queuing background work", e);
          throw new RuntimeException("Error while queuing background work", e);
        }
        return future;
      }
    }
  }
  
  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getDocumentText(int, int, int)
   */
  @Override
  public String[][] getDocumentText(long rank, int termPosition, int length) 
          throws IndexException, IndexOutOfBoundsException, IOException {
    return queryEngine.getText(getDocumentID(rank), termPosition, length);
  }

  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getDocumentURI(int)
   */
  @Override
  public String getDocumentURI(long rank) throws IndexException, 
      IndexOutOfBoundsException, IOException {
    return queryEngine.getDocumentURI(getDocumentID(rank));
  }

  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getDocumentTitle(int)
   */
  @Override
  public String getDocumentTitle(long rank) throws IndexException, 
      IndexOutOfBoundsException, IOException {
    return queryEngine.getDocumentTitle(getDocumentID(rank));
  }

  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getDocumentMetadataField(int, java.lang.String)
   */
  @Override
  public Serializable getDocumentMetadataField(long rank, String fieldName)
      throws IndexException, IndexOutOfBoundsException, IOException {
    return queryEngine.getDocumentMetadataField(getDocumentID(rank), fieldName);
  }

  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#getDocumentMetadataFields(int, java.util.Set)
   */
  @Override
  public Map<String, Serializable> getDocumentMetadataFields(long rank,
      Set<String> fieldNames) throws IndexException, IndexOutOfBoundsException, 
      IOException {
    Map<String, Serializable> res = new HashMap<String, Serializable>();
    for(String fieldName : fieldNames) {
      Serializable value = getDocumentMetadataField(rank, fieldName);
      if(value != null) res.put(fieldName, value);
    }
    return res;
  }
  
  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#renderDocument(int, java.lang.Appendable)
   */
  @Override
  public void renderDocument(long rank, Appendable out) throws IOException, 
      IndexException {
        queryEngine.renderDocument(getDocumentID(rank), 
                getDocumentHits(rank), out);
  }
  
  /* (non-Javadoc)
   * @see gate.mimir.search.QueryRunner#close()
   */
  @Override
  public void close() throws IOException {
    this.closed = true;
    try{
      if(queryEngine != null) queryEngine.releaseQueryRunner(this);
      if(queryExecutor != null) queryExecutor.close();
      scorer = null;      
    } finally {
      try {
        // stop the background tasks runnable, 
        // which will return the thread to the pool
        backgroundTasks.put(NO_MORE_TASKS);
      } catch(InterruptedException e) {
        // ignore
      }      
    }
  }

  /**
   * Find the next document ID for the current query executor which is not
   * marked as deleted in the index.
   */
  protected long nextNotDeleted() throws IOException {
    long docId = ranking ? scorer.nextDocument(-1)
                         : queryExecutor.nextDocument(-1);
    while(docId >= 0 && queryEngine.getIndex().isDeleted(docId)) {
      docId = ranking ? scorer.nextDocument(-1)
                      : queryExecutor.nextDocument(-1);
    }
    
    return docId;
  }
}