/*
* QueryRunner.java
*
* Copyright (c) 2007-2011, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* Valentin Tablan, 22 Nov 2011
*
* $Id$
*/
package gate.mimir.search;
import gate.mimir.DocumentMetadataHelper;
import gate.mimir.index.IndexException;
import gate.mimir.search.query.Binding;
import gate.mimir.search.query.QueryExecutor;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* A QueryRunner is used to manage the execution of the query (supplied as a
* {@link QueryExecutor}). Implementations may use a background thread to
* pre-fetch data which they then make available through the public API.
*
* All references to documents are made by rank, i.e. the position of the
* document in the list of results.
*
* Unless there is a good reason not to (e.g. results ranking), the documents
* will be returned in increasing documentID order.
*
* QueryRunners that perform ranking will re-order the result list so that
* documents are returned in decreasing score order.
*/
public interface QueryRunner {
/**
* The default score given to all documents when actual scoring is not being
* performed.
*/
public final double DEFAULT_SCORE = 1.0;
/**
* Gets the number of result documents.
* @return <code>-1</code> if the search has not yet completed, the total
* number of result document otherwise.
*/
public long getDocumentsCount();
/**
* Synchronous version of {@link #getDocumentsCount()} that waits if necessary
* before returning the correct result (instead of returning <code>-1</code>
* of the value is not yet known).
* @return the total number of documents found to match the query.
*/
public long getDocumentsCountSync();
/**
* Gets the number of result documents found so far. After the search
* completes, the result returned by this call is identical to that of
* {@link #getDocumentsCount()}.
* @return the number of result documents known so far.
*/
public long getDocumentsCurrentCount();
/**
* Gets the ID of a result document.
* @param rank the index of the desired document in the list of documents.
* This should be a value between 0 and {@link #getDocumentsCount()} -1.
*
* If the requested document position has not yet been ranked (i.e. we know
* there is a document at that position, but we don't yet know which one) then
* the necessary ranking is performed before this method returns.
*
* @return an int value, representing the ID of the requested document.
* @throws IndexOutOfBoundsException is the index provided is less than zero,
* or greater than {@link #getDocumentsCount()} -1.
* @throws IOException
*/
public long getDocumentID(long rank) throws IndexOutOfBoundsException,
IOException;
/**
* Get the score for a given result document. The value for the score depends
* on the scorer used by the {@link QueryEngine} (see
* {@link QueryEngine#setScorerSource(java.util.concurrent.Callable)}).
* @param rank the index of the desired document in the list of documents.
* This should be a value between 0 and {@link #getDocumentsCount()} -1.
* @return
*/
public double getDocumentScore(long rank) throws IndexOutOfBoundsException,
IOException ;
/**
* Retrieves the hits within a given result document.
* @param rank the index of the desired document in the list of documents.
* This should be a value between 0 and {@link #getDocumentsCount()} -1.
*
* This method call waits until the requested data is available before
* returning (document hits are being collected by a background thread).
*
* @return
* @throws IOException
* @throws IndexOutOfBoundsException
*/
public List<Binding> getDocumentHits(long rank)
throws IndexOutOfBoundsException, IOException;
/**
* Gets a segment of the document text for a given document.
* @param rank the rank of the requested document. This should be a value
* between 0 and {@link #getDocumentsCount()} -1.
* @param termPosition the first term requested.
* @param length the number of terms requested.
* @return two parallel String arrays, one containing term text, the other
* containing the spaces in between. The first term is results[0][0], the
* space following it is results[1][0], etc.
*
* @throws IndexException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public String[][] getDocumentText(long rank, int termPosition, int length)
throws IndexException, IndexOutOfBoundsException, IOException;
/**
* Obtains the URI for a given document.
* @param rank the rank for the requested document. This should be a value
* between 0 and {@link #getDocumentsCount()} -1.
* @return the URI provided at indexing time for the document.
* @throws IndexException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public String getDocumentURI(long rank) throws IndexException,
IndexOutOfBoundsException, IOException;
/**
* Obtains the title for a given document.
* @param rank the rank of the requested document. This should be a value
* between 0 and {@link #getDocumentsCount()} -1.
* @return the document title (provided at indexing time).
* @throws IndexException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public String getDocumentTitle(long rank) throws IndexException,
IndexOutOfBoundsException, IOException;
/**
* Obtains an arbitrary document metadata field from the stored document data.
* {@link DocumentMetadataHelper}s used at indexing time can add arbitrary
* {@link Serializable} values as metadata fields for the documents being
* indexed. This method is used at search time to retrieve those values.
*
* @param rank the rank for the requested document. This should be a value
* between 0 and {@link #getDocumentsCount()} -1.
* @param fieldName the field name for which the value is sought.
* @return
* @throws IndexException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public Serializable getDocumentMetadataField(long rank, String fieldName)
throws IndexException, IndexOutOfBoundsException, IOException;
/**
* Obtains a set of arbitrary document metadata fields from the stored
* document data.
* {@link DocumentMetadataHelper}s used at indexing time can add arbitrary
* {@link Serializable} values as metadata fields for the documents being
* indexed. This method is used at search time to retrieve those values.
*
* @param rank the rank for the requested document. This should be a value
* between 0 and {@link #getDocumentsCount()} -1.
* @param fieldNames the names of the metadata fields for which the values are
* requested.
* @return a {@link Map} linking field names with their values.
* @throws IndexException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public Map<String, Serializable> getDocumentMetadataFields(long rank,
Set<String> fieldNames) throws IndexException,
IndexOutOfBoundsException, IOException;
/**
* Render the content of the given document, with the hits for this query
* highlighted.
*
* @param rank the rank for the requested document. This should be a value
* between 0 and {@link #getDocumentsCount()} -1.
* @param out an {@link Appendable} to which the output is written.
* @throws IOException
* @throws IndexException
*/
public void renderDocument(long rank, Appendable out) throws IOException,
IndexException;
/**
* Closes this {@link QueryExecutor} and releases all resources used.
* @throws IOException
*/
public void close() throws IOException;
}