InstantiatedTermFreqVector.java example

Explorer
solrcene-master
package org.apache.lucene.store.instantiated;

import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.util.BytesRef;

import java.io.Serializable;
import java.util.Arrays;
import java.util.List;

/**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Vector space view of a document in an {@link InstantiatedIndexReader}.
 *
 * @see org.apache.lucene.index.TermFreqVector
 */
public class InstantiatedTermFreqVector
    implements TermFreqVector, Serializable {

  private static final long serialVersionUID = 1l;

  private final List<InstantiatedTermDocumentInformation> termDocumentInformations;
  private final String field;
  private final BytesRef terms[];
  private final int termFrequencies[];

  public InstantiatedTermFreqVector(InstantiatedDocument document, String field) {
    this.field = field;
    termDocumentInformations = document.getVectorSpace().get(field);
    terms = new BytesRef[termDocumentInformations.size()];
    termFrequencies = new int[termDocumentInformations.size()];

    for (int i = 0; i < termDocumentInformations.size(); i++) {
      InstantiatedTermDocumentInformation termDocumentInformation = termDocumentInformations.get(i);
      terms[i] = termDocumentInformation.getTerm().getTerm().bytes();
      termFrequencies[i] = termDocumentInformation.getTermPositions().length;
    }
  }

  /**
   * @return The number of the field this vector is associated with
   */
  public String getField() {
    return field;
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append('{');
    sb.append(field).append(": ");
    if (terms != null) {
      for (int i = 0; i < terms.length; i++) {
        if (i > 0) sb.append(", ");
        sb.append(terms[i]).append('/').append(termFrequencies[i]);
      }
    }
    sb.append('}');

    return sb.toString();
  }

  public int size() {
    return terms == null ? 0 : terms.length;
  }

  public BytesRef[] getTerms() {
    return terms;
  }

  public int[] getTermFrequencies() {
    return termFrequencies;
  }

  public int indexOf(BytesRef termText) {
    if (terms == null)
      return -1;
    int res = Arrays.binarySearch(terms, termText);
    return res >= 0 ? res : -1;
  }

  public int[] indexesOf(BytesRef[] termNumbers, int start, int len) {
    // TODO: there must be a more efficient way of doing this.
    //       At least, we could advance the lower bound of the terms array
    //       as we find valid indices. Also, it might be possible to leverage
    //       this even more by starting in the middle of the termNumbers array
    //       and thus dividing the terms array maybe in half with each found index.
    int res[] = new int[len];

    for (int i = 0; i < len; i++) {
      res[i] = indexOf(termNumbers[start + i]);
    }
    return res;
  }

  public List<InstantiatedTermDocumentInformation> getTermDocumentInformations() {
    return termDocumentInformations;
  }

}