TermQuery.java example

Explorer

mimir-master
- etc
  - generated-header.java
- mimir-client
  - src
    - gate
      - mimir
        index
        MimirConnector.java
        MimirIndexingPR.java
        search
        RemoteQueryRunner.java
        tool
        WebUtils.java
- mimir-cloud
  - archive-unpacker
    - src
      - gate
        mimir
        util
        MultiFileInputStream.java
        UnpackWizard.java
  - src
    - java
      - gate
        mimir
        util
        IndexArchiveState.java
        MultiFileOutputStream.java
- mimir-core
  - src
    - gate
      - mimir
        AbstractSemanticAnnotationHelper.java
        Constraint.java
        ConstraintType.java
        DocumentMetadataHelper.java
        DocumentRenderer.java
        IndexConfig.java
        MimirIndex.java
        SemanticAnnotationHelper.java
        index
        AtomicAnnotationIndex.java
        AtomicIndex.java
        AtomicTokenIndex.java
        DocumentCollection.java
        DocumentData.java
        GATEDocument.java
        GATEDocumentFactory.java
        IndexException.java
        Mention.java
        OriginalMarkupMetadataHelper.java
        package-info.java
        search
        FederatedQueryRunner.java
        IndexReaderPool.java
        QueryEngine.java
        QueryRunner.java
        RankingQueryRunnerImpl.java
        query
        AbstractIntersectionQueryExecutor.java
        AbstractOverlapQuery.java
        AbstractQueryExecutor.java
        AndQuery.java
        AnnotationQuery.java
        Binding.java
        ConstQuery.java
        ContainsQuery.java
        ExecutorsList.java
        GapQuery.java
        MinusQuery.java
        OrQuery.java
        QueryExecutor.java
        QueryNode.java
        RepeatsQuery.java
        SequenceQuery.java
        TermQuery.java
        WithinQuery.java
        parser
        ParseException.java
        Query.java
        QueryParser.java
        QueryParserConstants.java
        QueryParserTokenManager.java
        SimpleCharStream.java
        Token.java
        TokenMgrError.java
        score
        BindingScorer.java
        DelegatingScoringQueryExecutor.java
        MimirScorer.java
        terms
        AbstractCompoundTermsQuery.java
        AbstractDocumentsBasedTermsQuery.java
        AbstractIndexTermsQuery.java
        AndTermsQuery.java
        AnnotationTermsQuery.java
        CompoundTermsQuery.java
        ConstTermsQuery.java
        DocumentTermsQuery.java
        DocumentsAndTermsQuery.java
        DocumentsBasedTermsQuery.java
        DocumentsOrTermsQuery.java
        LimitTermsQuery.java
        OrTermsQuery.java
        SortedTermsQuery.java
        TermTypeTermsQuery.java
        TermsQuery.java
        TermsResultSet.java
        util
        DefaultMentionDescriber.java
        DelegatingSemanticAnnotationHelper.java
        DocumentFeaturesMetadataHelper.java
        IgnoreEmptiesTermProcessor.java
        IndexUpgrader.java
        MG4JTools.java
        NormalizingTermProcessor.java
        OntologyMentionDescriber.java
        TruncateIndex.java
- mimir-test
  - src
    - gate
      - mimir
        test
        QueryTests.java
        RenderZipCollection.java
        Scratch.java
        ScratchConsole.java
        TestQueryParser.java
        TestUtils.java
- mimir-web
  - src
    - gwt
      - gate
        mimir
        web
        client
        UI.java
    - java
      - gate
        mimir
        util
        LogAnalyser.java
        web
        client
        DocumentData.java
        GwtRpcService.java
        GwtRpcServiceAsync.java
        MimirSearchException.java
        ResultsData.java
- plugins
  - db-h2
    - src
      - gate
        mimir
        db
        AnnotationTemplateCache.java
        DBSemanticAnnotationHelper.java
  - measurements
    - src
      - gate
        mimir
        measurements
        MeasurementAnnotationHelper.java
        MeasurementPluginResource.java
  - sparql
    - src
      - gate
        mimir
        sparql
        RequestMethod.java
        SPARQLResultSet.java
        SPARQLSemanticAnnotationHelper.java

/*
 *  TermQuery.java
 *
 *  Copyright (c) 2007-2011, The University of Sheffield.
 *
 *  This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html), 
 *  and is free software, licenced under the GNU Lesser General Public License,
 *  Version 3, June 2007 (also included with this distribution as file
 *  LICENCE-LGPL3.html).
 *
 *  Valentin Tablan, 03 Mar 2009
 *  
 *  $Id$
 */

package gate.mimir.search.query;

import gate.mimir.IndexConfig;
import gate.mimir.index.AtomicIndex;
import gate.mimir.search.IndexReaderPool;
import gate.mimir.search.QueryEngine;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.di.big.mg4j.index.Index;
import it.unimi.di.big.mg4j.index.IndexIterator;
import it.unimi.di.big.mg4j.index.IndexIterators;
import it.unimi.di.big.mg4j.index.IndexReader;
import it.unimi.di.big.mg4j.index.payload.Payload;
import it.unimi.di.big.mg4j.search.DocumentIterator;
import it.unimi.di.big.mg4j.search.IntervalIterator;
import it.unimi.di.big.mg4j.search.visitor.DocumentIteratorVisitor;

import java.io.IOException;

import static gate.mimir.search.QueryEngine.IndexType;

/**
 * A {@link QueryNode} for term queries. A term query consists of an index name 
 * and a query term. 
 */
public class TermQuery implements QueryNode {

  private static final long serialVersionUID = 7302348587893649887L;

  /**
   * The query term
   */
  private String term;
  
  /**
   * The term ID for this query. If not known, 
   * {@link DocumentIterator#END_OF_LIST} is used.
   */
  private long termId = DocumentIterator.END_OF_LIST;
  
  
  /**
   * The name of the index to search.
   */
  private String indexName;
  
  /**
   * The type of the index to be searched.
   */
  private IndexType indexType;
  
  /**
   * The length of the matches. Defaults to <code>1</code>.
   */
  private int length;
  
  
  /**
   * A {@link QueryExecutor} for {@link TermQuery} nodes.
   */
  public static class TermQueryExecutor extends AbstractQueryExecutor implements IndexIterator{

    
    /**
     * The {@link TermQuery} node being executed.
     */
    private TermQuery query;
    
    /**
     * A local reference to the {@link IndexReaderPool} from the 
     * {@link QueryEngine}.
     */
    private AtomicIndex atomicIndex;
    
    /**
     * The {@link IndexReader} from the {@link #atomicIndex}.
     */
    private IndexReader indexReader;
    
    /**
     * The index iterator used to obtain hits. 
     */
    private IndexIterator indexIterator;
    
    /**
     * The positions iterator for the latest document.
     */
    private IntIterator positionsIterator;
    
    
    /**
     * @param node
     * @param invertedIndex
     * @throws IOException if the index files cannot be accessed.
     */
    public TermQueryExecutor(TermQuery node, QueryEngine engine) throws IOException {
      super(engine, node);
      this.query = node;
      atomicIndex = query.getIndex(engine);

      if(atomicIndex == null) throw new IllegalArgumentException(
              "No index provided for field " + node.getIndexName() + "!");
      Index mg4jIndex = atomicIndex.getIndex();
      if(mg4jIndex != null) {
        indexReader = mg4jIndex.getReader();      
        // if we have the term ID, use that
        if(query.termId != DocumentIterator.END_OF_LIST) {
          this.indexIterator = indexReader.documents(query.termId);
          // set the term (used by rankers)
          MutableString mutableString = new MutableString(query.getTerm());
          atomicIndex.getIndex().termProcessor.processTerm(mutableString);
          this.indexIterator.term(mutableString);
        } else {
          //use the term processor for the query term
          MutableString mutableString = new MutableString(query.getTerm());
          atomicIndex.getIndex().termProcessor.processTerm(mutableString);
          this.indexIterator = indexReader.documents(mutableString.toString());        
        }        
      } else {
        // the atomic index is empty: we have exhausted the search already
        latestDocument = -1;
      }

      positionsIterator = null;
    }

    /* (non-Javadoc)
     * @see gate.mimir.search.query.QueryExecutor#nextDocument()
     */
    public long nextDocument(long from) throws IOException {
      if(closed) return latestDocument = -1;
      if(latestDocument == -1){
        //we have exhausted the search already
        return latestDocument;
      }
      
      if (from >= latestDocument){
        //we do need to skip
        latestDocument = indexIterator.skipTo(from + 1);
      }else{
        //from is lower than latest document, 
        //so we just return the next document
        latestDocument = indexIterator.nextDocument();
      }
      if(latestDocument == DocumentIterator.END_OF_LIST){
        //no more documents available
        latestDocument = -1;
      } else {
        positionsIterator = IndexIterators.positionIterator(indexIterator);
      }
      return latestDocument;
    }

    /* (non-Javadoc)
     * @see gate.mimir.search.query.QueryExecutor#nextHit(java.util.Map)
     */
    public Binding nextHit() throws IOException{
      if(closed) return null;
      if(positionsIterator == null) positionsIterator = 
          IndexIterators.positionIterator(indexIterator);
      if(latestDocument >= 0 && positionsIterator.hasNext()){
        int position = positionsIterator.nextInt();
        return new Binding(query, latestDocument, position, query.length, null);
      }else{
        //no more positions, or no more documents
        return null;
      }
    }

    /* (non-Javadoc)
     * @see gate.mimir.search.query.QueryExecutor#close()
     */
    public void close() throws IOException {
      if(closed) return;
      super.close();
      indexIterator = null;
      if(indexReader != null) indexReader.close();
//      atomicIndex.returnReader(indexReader);
    }

    
    /* (non-Javadoc)
     * @see it.unimi.di.big.mg4j.index.IndexIterator#nextPosition()
     */
    @Override
    public int nextPosition() throws IOException {
      // TODO Auto-generated method stub
      throw new UnsupportedOperationException("Method not implemented!");
    }

    public boolean hasNext() {
      throw new UnsupportedOperationException("Method not implemented!");
    }

    public Integer next() {
      throw new UnsupportedOperationException("Method not implemented!");
    }

    public void remove() {
      throw new UnsupportedOperationException("Method not implemented!");
    }

    public Index index() {
      return indexIterator.index();
    }

    public IntervalIterator intervalIterator() throws IOException {
      return indexIterator.intervalIterator();
    }

    public long frequency() throws IOException {
      return indexIterator.frequency();
    }

    public IntervalIterator intervalIterator(Index index) throws IOException {
      return indexIterator.intervalIterator(index);
    }

    public Payload payload() throws IOException {
      return indexIterator.payload();
    }

    public int count() throws IOException {
      return indexIterator.count();
    }

    public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
      throws IOException {
      return indexIterator.intervalIterators();
    }

    public ReferenceSet<Index> indices() {
      return indexIterator.indices();
    }

    public IndexIterator id(int id) {
      return indexIterator.id(id);
    }

    public long nextDocument() throws IOException {
      return indexIterator.nextDocument();
    }

    public int id() {
      return indexIterator.id();
    }

    public long document() {
      return indexIterator.document();
    }

    public <T> T accept(DocumentIteratorVisitor<T> visitor) throws IOException {
      return indexIterator.accept(visitor);
    }

    public <T> T acceptOnTruePaths(DocumentIteratorVisitor<T> visitor)
      throws IOException {
      return indexIterator.acceptOnTruePaths(visitor);
    }

    public void dispose() throws IOException {
      indexIterator.dispose();
    }

    public long termNumber() {
      return indexIterator.termNumber();
    }

    public String term() {
      return indexIterator.term();
    }

    public IndexIterator term(CharSequence term) {
      return indexIterator.term(term);
    }

    public IndexIterator weight(double weight) {
      return indexIterator.weight(weight);
    }

    public long skipTo(long n) throws IOException {
      return indexIterator.skipTo(n);
    }

    public double weight() {
      return indexIterator.weight();
    }
    
  }
  
  /**
   * @return the term
   */
  public CharSequence getTerm() {
    return term;
  }
  
  /**
   * @return the termId
   */
  public long getTermId() {
    return termId;
  }

  /**
   * @return the indexName
   */
  public String getIndexName() {
    return indexName;
  }
  
  /**
   * Gets the index for this query in a given {@link QueryEngine}.
   * @param engine
   * @return
   */
  public AtomicIndex getIndex(QueryEngine engine) {
    switch(this.indexType){
      case TOKENS:
        return engine.getTokenIndex(indexName);
      case ANNOTATIONS:
        return engine.getAnnotationIndex(indexName);
      default:
        throw new IllegalArgumentException("Indexes of type " + 
                indexType + " are not supported!"); 
    }
  }
  
  
  /**
   * Creates a new term query, for searching over the document text. 
   * 
   * @param indexName the name of the index to be searched. This should be one
   * of the annotation feature names used for indexing tokens (see 
   * {@link IndexConfig.TokenIndexerConfig}).
   * 
   * @param term the term to be searched for.
   * 
   * @see IndexConfig.TokenIndexerConfig
   */
  public TermQuery(String indexName, String term) {
    this(IndexType.TOKENS, indexName, term, 1);
  }
  
  /**
   * Creates a new term query, for searching over the document text. 
   * 
   * @param indexName the name of the index to be searched. This should be one
   * of the annotation feature names used for indexing tokens (see 
   * {@link IndexConfig.TokenIndexerConfig}).
   * 
   * @param termId the term ID for the term to be searched for.
   * 
   * @see IndexConfig.TokenIndexerConfig
   */
  public TermQuery(String indexName, String term, long termId) {
    this(IndexType.TOKENS, indexName, term, termId, 1);
  }
  
  /**
   * Creates a new term query, for searching over semantic annotations.
   *   
   * @param annotationType the type of annotation sought. This should one of the 
   * annotation types used when indexing semantic annotations (see 
   * {@link IndexConfig.SemanticIndexerConfig}).
   * 
   * @param mentionURI the URI of the mention sought.
   * 
   * @param length the length of the mention sought.
   */
  public TermQuery(String annotationType, String mentionURI, int length) {
    this(IndexType.ANNOTATIONS, annotationType, mentionURI, length);
  }
  
  /**
   * Creates a new term query, for searching over semantic annotations.
   *   
   * @param annotationType the type of annotation sought. This should one of the 
   * annotation types used when indexing semantic annotations (see 
   * {@link IndexConfig.SemanticIndexerConfig}).
   * 
   * @param mentionTermid the term ID for the mentionURI sought.
   * 
   * @param length the length of the mention sought.
   */
  public TermQuery(String annotationType, String term, long mentionTermid, int length) {
    this(IndexType.ANNOTATIONS, annotationType, term, mentionTermid, length);
  }  
  
  /**
   * Creates a new term query. This constructor is part of a low-level API. see 
   * the other constructors of this class, which may be more suitable!
   *   
   * @param indexType The type of index to be searched.
   * 
   * @param indexName the name of the index to be searched. If the indexType is
   * {@link IndexType#TOKENS}, then the name is interpreted as the feature name 
   * for the document tokens, if the indexType is {@link IndexType#ANNOTATIONS}, 
   * then the name is interpreted as annotation type.
   * 
   * @param term the term to be searched for.
   * 
   * @param length the length of the hits (useful in the case of annotation 
   * indexes, where the length of each mention is stored external to the actual 
   * index).
   */
  public TermQuery(IndexType indexType, String indexName, String term, int length) {
    this.indexType = indexType;
    this.indexName = indexName;
    this.term = term;
    this.length = length;
  }
  
  /**
   * Creates a new term query. This constructor is part of a low-level API. see 
   * the other constructors of this class, which may be more suitable!
   *   
   * @param indexType The type of index to be searched.
   * 
   * @param indexName the name of the index to be searched. If the indexType is
   * {@link IndexType#TOKENS}, then the name is interpreted as the feature name 
   * for the document tokens, if the indexType is {@link IndexType#ANNOTATIONS}, 
   * then the name is interpreted as annotation type.
   * 
   * @param length the length of the hits (useful in the case of annotation 
   * indexes, where the length of each mention is stored external to the actual 
   * index).
   * 
   * @param termId the term ID for sought term.
   */
  public TermQuery(IndexType indexType, String indexName, String term, long termId, int length) {
    this.indexType = indexType;
    this.indexName = indexName;
    this.termId = termId;
    this.term = term;
    this.length = length;
  }
  
  
  
  /**
   * Gets a new query executor for this {@link TermQuery}.
   * @param indexes the set of indexes running on.
   * @return an appropriate {@link QueryExecutor} (in this case, an instance of
   * {@link TermQueryExecutor}).
   * @throws IOException if the index files cannot be accessed.
   * @throws IllegalArgumentException if the provided set of indexes does not
   * include an index for this query's {@link #indexName}.
   * @see gate.mimir.search.query.QueryNode#getQueryExecutor(java.util.Map)
   */
  public QueryExecutor getQueryExecutor(QueryEngine engine) throws IOException {
    return new TermQueryExecutor(this, engine);
  }
  
  public String toString() {
    return "TERM(" + 
        (indexName == null ? "" : indexName) + 
        ":" + term + ")";
  }

  public IndexType getIndexType() {
    return indexType;
  }

  public int getLength() {
    return length;
  }
}