SegmentTermPositions.java example

Explorer
solrcene-master
package org.apache.lucene.index.codecs.preflex;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;

import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.IndexInput;

/** @lucene.experimental */
public final class SegmentTermPositions
extends SegmentTermDocs  {
  private IndexInput proxStream;
  private IndexInput proxStreamOrig;
  private int proxCount;
  private int position;
  
  // the current payload length
  private int payloadLength;
  // indicates whether the payload of the current position has
  // been read from the proxStream yet
  private boolean needToLoadPayload;
  
  // these variables are being used to remember information
  // for a lazy skip
  private long lazySkipPointer = -1;
  private int lazySkipProxCount = 0;

  /*
  SegmentTermPositions(SegmentReader p) {
    super(p);
    this.proxStream = null;  // the proxStream will be cloned lazily when nextPosition() is called for the first time
  }
  */

  public SegmentTermPositions(IndexInput freqStream, IndexInput proxStream, TermInfosReader tis, FieldInfos fieldInfos) {
    super(freqStream, tis, fieldInfos);
    this.proxStreamOrig = proxStream;  // the proxStream will be cloned lazily when nextPosition() is called for the first time
  }

  final void seek(TermInfo ti, Term term) throws IOException {
    super.seek(ti, term);
    if (ti != null)
      lazySkipPointer = ti.proxPointer;
    
    lazySkipProxCount = 0;
    proxCount = 0;
    payloadLength = 0;
    needToLoadPayload = false;
  }

  public final void close() throws IOException {
    super.close();
    if (proxStream != null) proxStream.close();
  }

  public final int nextPosition() throws IOException {
    if (currentFieldOmitTermFreqAndPositions)
      // This field does not store term freq, positions, payloads
      return 0;
    // perform lazy skips if necessary
    lazySkip();
    proxCount--;
    return position += readDeltaPosition();
  }

  private final int readDeltaPosition() throws IOException {
    int delta = proxStream.readVInt();
    if (currentFieldStoresPayloads) {
      // if the current field stores payloads then
      // the position delta is shifted one bit to the left.
      // if the LSB is set, then we have to read the current
      // payload length
      if ((delta & 1) != 0) {
        payloadLength = proxStream.readVInt();
      } 
      delta >>>= 1;
      needToLoadPayload = true;
    }
    return delta;
  }
  
  protected final void skippingDoc() throws IOException {
    // we remember to skip a document lazily
    lazySkipProxCount += freq;
  }

  public final boolean next() throws IOException {
    // we remember to skip the remaining positions of the current
    // document lazily
    lazySkipProxCount += proxCount;
    
    if (super.next()) {               // run super
      proxCount = freq;               // note frequency
      position = 0;               // reset position
      return true;
    }
    return false;
  }

  public final int read(final int[] docs, final int[] freqs) {
    throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
  }


  /** Called by super.skipTo(). */
  protected void skipProx(long proxPointer, int payloadLength) throws IOException {
    // we save the pointer, we might have to skip there lazily
    lazySkipPointer = proxPointer;
    lazySkipProxCount = 0;
    proxCount = 0;
    this.payloadLength = payloadLength;
    needToLoadPayload = false;
  }

  private void skipPositions(int n) throws IOException {
    assert !currentFieldOmitTermFreqAndPositions;
    for (int f = n; f > 0; f--) {        // skip unread positions
      readDeltaPosition();
      skipPayload();
    }      
  }
  
  private void skipPayload() throws IOException {
    if (needToLoadPayload && payloadLength > 0) {
      proxStream.seek(proxStream.getFilePointer() + payloadLength);
    }
    needToLoadPayload = false;
  }

  // It is not always necessary to move the prox pointer
  // to a new document after the freq pointer has been moved.
  // Consider for example a phrase query with two terms:
  // the freq pointer for term 1 has to move to document x
  // to answer the question if the term occurs in that document. But
  // only if term 2 also matches document x, the positions have to be
  // read to figure out if term 1 and term 2 appear next
  // to each other in document x and thus satisfy the query.
  // So we move the prox pointer lazily to the document
  // as soon as positions are requested.
  private void lazySkip() throws IOException {
    if (proxStream == null) {
      // clone lazily
      proxStream = (IndexInput)proxStreamOrig.clone();
    }
    
    // we might have to skip the current payload
    // if it was not read yet
    skipPayload();
      
    if (lazySkipPointer != -1) {
      proxStream.seek(lazySkipPointer);
      lazySkipPointer = -1;
    }
     
    if (lazySkipProxCount != 0) {
      skipPositions(lazySkipProxCount);
      lazySkipProxCount = 0;
    }
  }
  
  public int getPayloadLength() {
    return payloadLength;
  }

  public byte[] getPayload(byte[] data, int offset) throws IOException {
    if (!needToLoadPayload) {
      throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
    }

    // read payloads lazily
    byte[] retArray;
    int retOffset;
    if (data == null || data.length - offset < payloadLength) {
      // the array is too small to store the payload data,
      // so we allocate a new one
      retArray = new byte[payloadLength];
      retOffset = 0;
    } else {
      retArray = data;
      retOffset = offset;
    }
    proxStream.readBytes(retArray, retOffset, payloadLength);
    needToLoadPayload = false;
    return retArray;
  }

  public boolean isPayloadAvailable() {
    return needToLoadPayload && payloadLength > 0;
  }

}