TermVectorAccessor.java example

Explorer
solrcene-master
package org.apache.lucene.index;

/*
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


/**
 * Transparent access to the vector space model,
 * either via TermFreqVector or by resolving it from the inverted index.
 * <p/>
 * Resolving a term vector from a large index can be a time consuming process.
 * <p/>
 * Warning! This class is not thread safe!
 */
public class TermVectorAccessor {

  public TermVectorAccessor() {
  }

  /**
   * Instance reused to save garbage collector some time
   */
  private TermVectorMapperDecorator decoratedMapper = new TermVectorMapperDecorator();


  /**
   * Visits the TermVectorMapper and populates it with terms available for a given document,
   * either via a vector created at index time or by resolving them from the inverted index.
   *
   * @param indexReader    Index source
   * @param documentNumber Source document to access
   * @param fieldName      Field to resolve
   * @param mapper         Mapper to be mapped with data
   * @throws IOException
   */
  public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException {

    fieldName = StringHelper.intern(fieldName);

    decoratedMapper.decorated = mapper;
    decoratedMapper.termVectorStored = false;

    indexReader.getTermFreqVector(documentNumber, fieldName, decoratedMapper);

    if (!decoratedMapper.termVectorStored) {
      mapper.setDocumentNumber(documentNumber);
      build(indexReader, fieldName, mapper, documentNumber);
    }
  }

  /** Instance reused to save garbage collector some time */
  private List<BytesRef> tokens;

  /** Instance reused to save garbage collector some time */
  private List<int[]> positions;

  /** Instance reused to save garbage collector some time */
  private List<Integer> frequencies;


  /**
   * Populates the mapper with terms available for the given field in a document
   * by resolving the inverted index.
   *
   * @param indexReader
   * @param field interned field name
   * @param mapper
   * @param documentNumber
   * @throws IOException
   */
  private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException {

    if (tokens == null) {
      tokens = new ArrayList<BytesRef>(500);
      positions = new ArrayList<int[]>(500);
      frequencies = new ArrayList<Integer>(500);
    } else {
      tokens.clear();
      frequencies.clear();
      positions.clear();
    }

    final Bits delDocs = MultiFields.getDeletedDocs(indexReader);

    Terms terms = MultiFields.getTerms(indexReader, field);
    boolean anyTerms = false;
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      DocsEnum docs = null;
      DocsAndPositionsEnum postings = null;
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null) {
          anyTerms = true;
          if (!mapper.isIgnoringPositions()) {
            docs = postings = termsEnum.docsAndPositions(delDocs, postings);
          } else {
            docs = termsEnum.docs(delDocs, docs);
          }

          int docID = docs.advance(documentNumber);
          if (docID == documentNumber) {

            frequencies.add(Integer.valueOf(docs.freq()));
            tokens.add(new BytesRef(text));

            if (!mapper.isIgnoringPositions()) {
              int[] positions = new int[docs.freq()];
              for (int i = 0; i < positions.length; i++) {
                positions[i] = postings.nextPosition();
              }
              this.positions.add(positions);
            } else {
              positions.add(null);
            }
          }
        } else {
          break;
        }
      }

      if (anyTerms) {
        mapper.setDocumentNumber(documentNumber);
        mapper.setExpectations(field, tokens.size(), false, !mapper.isIgnoringPositions());
        for (int i = 0; i < tokens.size(); i++) {
          mapper.map(tokens.get(i), frequencies.get(i).intValue(), (TermVectorOffsetInfo[]) null, positions.get(i));
        }
      }
    }
  }


  private static class TermVectorMapperDecorator extends TermVectorMapper {

    private TermVectorMapper decorated;

    @Override
    public boolean isIgnoringPositions() {
      return decorated.isIgnoringPositions();
    }

    @Override
    public boolean isIgnoringOffsets() {
      return decorated.isIgnoringOffsets();
    }

    private boolean termVectorStored = false;

    @Override
    public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
      decorated.setExpectations(field, numTerms, storeOffsets, storePositions);
      termVectorStored = true;
    }

    @Override
    public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
      decorated.map(term, frequency, offsets, positions);
    }

    @Override
    public void setDocumentNumber(int documentNumber) {
      decorated.setDocumentNumber(documentNumber);
    }
  }

}