Lucene50PostingsReader.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.lucene50;


import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;

import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_ENCODED_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.VERSION_START;

/**
 * Concrete class that reads docId(maybe frq,pos,offset,payloads) list
 * with postings format.
 *
 * @lucene.experimental
 */
public final class Lucene50PostingsReader extends PostingsReaderBase {

  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Lucene50PostingsReader.class);

  private final IndexInput docIn;
  private final IndexInput posIn;
  private final IndexInput payIn;

  final ForUtil forUtil;
  private int version;

  /** Sole constructor. */
  public Lucene50PostingsReader(SegmentReadState state) throws IOException {
    boolean success = false;
    IndexInput docIn = null;
    IndexInput posIn = null;
    IndexInput payIn = null;
    
    // NOTE: these data files are too costly to verify checksum against all the bytes on open,
    // but for now we at least verify proper structure of the checksum footer: which looks
    // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
    // such as file truncation.
    
    String docName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.DOC_EXTENSION);
    try {
      docIn = state.directory.openInput(docName, state.context);
      version = CodecUtil.checkIndexHeader(docIn, DOC_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
      forUtil = new ForUtil(docIn);
      CodecUtil.retrieveChecksum(docIn);

      if (state.fieldInfos.hasProx()) {
        String proxName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.POS_EXTENSION);
        posIn = state.directory.openInput(proxName, state.context);
        CodecUtil.checkIndexHeader(posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
        CodecUtil.retrieveChecksum(posIn);

        if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
          String payName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.PAY_EXTENSION);
          payIn = state.directory.openInput(payName, state.context);
          CodecUtil.checkIndexHeader(payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
          CodecUtil.retrieveChecksum(payIn);
        }
      }

      this.docIn = docIn;
      this.posIn = posIn;
      this.payIn = payIn;
      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(docIn, posIn, payIn);
      }
    }
  }

  @Override
  public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
    // Make sure we are talking to the matching postings writer
    CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    final int indexBlockSize = termsIn.readVInt();
    if (indexBlockSize != BLOCK_SIZE) {
      throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
    }
  }

  /**
   * Read values that have been written using variable-length encoding instead of bit-packing.
   */
  static void readVIntBlock(IndexInput docIn, int[] docBuffer,
      int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
    if (indexHasFreq) {
      for(int i=0;i<num;i++) {
        final int code = docIn.readVInt();
        docBuffer[i] = code >>> 1;
        if ((code & 1) != 0) {
          freqBuffer[i] = 1;
        } else {
          freqBuffer[i] = docIn.readVInt();
        }
      }
    } else {
      for(int i=0;i<num;i++) {
        docBuffer[i] = docIn.readVInt();
      }
    }
  }

  @Override
  public BlockTermState newTermState() {
    return new IntBlockTermState();
  }

  @Override
  public void close() throws IOException {
    IOUtils.close(docIn, posIn, payIn);
  }

  @Override
  public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
    throws IOException {
    final IntBlockTermState termState = (IntBlockTermState) _termState;
    final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    final boolean fieldHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
    final boolean fieldHasPayloads = fieldInfo.hasPayloads();

    if (absolute) {
      termState.docStartFP = 0;
      termState.posStartFP = 0;
      termState.payStartFP = 0;
    }

    termState.docStartFP += longs[0];
    if (fieldHasPositions) {
      termState.posStartFP += longs[1];
      if (fieldHasOffsets || fieldHasPayloads) {
        termState.payStartFP += longs[2];
      }
    }
    if (termState.docFreq == 1) {
      termState.singletonDocID = in.readVInt();
    } else {
      termState.singletonDocID = -1;
    }
    if (fieldHasPositions) {
      if (termState.totalTermFreq > BLOCK_SIZE) {
        termState.lastPosBlockOffset = in.readVLong();
      } else {
        termState.lastPosBlockOffset = -1;
      }
    }
    if (termState.docFreq > BLOCK_SIZE) {
      termState.skipOffset = in.readVLong();
    } else {
      termState.skipOffset = -1;
    }
  }
    
  @Override
  public PostingsEnum postings(FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags) throws IOException {
    
    boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
    boolean indexHasPayloads = fieldInfo.hasPayloads();

    if (indexHasPositions == false || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
      BlockDocsEnum docsEnum;
      if (reuse instanceof BlockDocsEnum) {
        docsEnum = (BlockDocsEnum) reuse;
        if (!docsEnum.canReuse(docIn, fieldInfo)) {
          docsEnum = new BlockDocsEnum(fieldInfo);
        }
      } else {
        docsEnum = new BlockDocsEnum(fieldInfo);
      }
      return docsEnum.reset((IntBlockTermState) termState, flags);
    } else if ((indexHasOffsets == false || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false) &&
               (indexHasPayloads == false || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
      BlockPostingsEnum docsAndPositionsEnum;
      if (reuse instanceof BlockPostingsEnum) {
        docsAndPositionsEnum = (BlockPostingsEnum) reuse;
        if (!docsAndPositionsEnum.canReuse(docIn, fieldInfo)) {
          docsAndPositionsEnum = new BlockPostingsEnum(fieldInfo);
        }
      } else {
        docsAndPositionsEnum = new BlockPostingsEnum(fieldInfo);
      }
      return docsAndPositionsEnum.reset((IntBlockTermState) termState);
    } else {
      EverythingEnum everythingEnum;
      if (reuse instanceof EverythingEnum) {
        everythingEnum = (EverythingEnum) reuse;
        if (!everythingEnum.canReuse(docIn, fieldInfo)) {
          everythingEnum = new EverythingEnum(fieldInfo);
        }
      } else {
        everythingEnum = new EverythingEnum(fieldInfo);
      }
      return everythingEnum.reset((IntBlockTermState) termState, flags);
    }
  }

  final class BlockDocsEnum extends PostingsEnum {
    private final byte[] encoded;
    
    private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
    private final int[] freqBuffer = new int[MAX_DATA_SIZE];

    private int docBufferUpto;

    private Lucene50SkipReader skipper;
    private boolean skipped;

    final IndexInput startDocIn;

    IndexInput docIn;
    final boolean indexHasFreq;
    final boolean indexHasPos;
    final boolean indexHasOffsets;
    final boolean indexHasPayloads;

    private int docFreq;                              // number of docs in this posting list
    private long totalTermFreq;                       // sum of freqs in this posting list (or docFreq when omitted)
    private int docUpto;                              // how many docs we've read
    private int doc;                                  // doc we last read
    private int accum;                                // accumulator for doc deltas
    private int freq;                                 // freq we last read

    // Where this term's postings start in the .doc file:
    private long docTermStartFP;

    // Where this term's skip data starts (after
    // docTermStartFP) in the .doc file (or -1 if there is
    // no skip data for this term):
    private long skipOffset;

    // docID for next skip point, we won't use skipper if 
    // target docID is not larger than this
    private int nextSkipDoc;
    
    private boolean needsFreq; // true if the caller actually needs frequencies
    private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1

    public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
      this.startDocIn = Lucene50PostingsReader.this.docIn;
      this.docIn = null;
      indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
      indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      indexHasPayloads = fieldInfo.hasPayloads();
      encoded = new byte[MAX_ENCODED_SIZE];    
    }

    public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
      return docIn == startDocIn &&
        indexHasFreq == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) &&
        indexHasPos == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) &&
        indexHasPayloads == fieldInfo.hasPayloads();
    }
    
    public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
      docFreq = termState.docFreq;
      totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
      docTermStartFP = termState.docStartFP;
      skipOffset = termState.skipOffset;
      singletonDocID = termState.singletonDocID;
      if (docFreq > 1) {
        if (docIn == null) {
          // lazy init
          docIn = startDocIn.clone();
        }
        docIn.seek(docTermStartFP);
      }

      doc = -1;
      this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
      if (indexHasFreq == false || needsFreq == false) {
        Arrays.fill(freqBuffer, 1);
      }
      accum = 0;
      docUpto = 0;
      nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
      docBufferUpto = BLOCK_SIZE;
      skipped = false;
      return this;
    }
    
    @Override
    public int freq() throws IOException {
      return freq;
    }

    @Override
    public int nextPosition() throws IOException {
      return -1;
    }

    @Override
    public int startOffset() throws IOException {
      return -1;
    }

    @Override
    public int endOffset() throws IOException {
      return -1;
    }

    @Override
    public BytesRef getPayload() throws IOException {
      return null;
    }

    @Override
    public int docID() {
      return doc;
    }
    
    private void refillDocs() throws IOException {
      final int left = docFreq - docUpto;
      assert left > 0;

      if (left >= BLOCK_SIZE) {
        forUtil.readBlock(docIn, encoded, docDeltaBuffer);

        if (indexHasFreq) {
          if (needsFreq) {
            forUtil.readBlock(docIn, encoded, freqBuffer);
          } else {
            forUtil.skipBlock(docIn); // skip over freqs
          }
        }
      } else if (docFreq == 1) {
        docDeltaBuffer[0] = singletonDocID;
        freqBuffer[0] = (int) totalTermFreq;
      } else {
        // Read vInts:
        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
      }
      docBufferUpto = 0;
    }

    @Override
    public int nextDoc() throws IOException {
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      accum += docDeltaBuffer[docBufferUpto];
      docUpto++;

      doc = accum;
      freq = freqBuffer[docBufferUpto];
      docBufferUpto++;
      return doc;
    }

    @Override
    public int advance(int target) throws IOException {
      // TODO: make frq block load lazy/skippable

      // current skip docID < docIDs generated from current buffer <= next skip docID
      // we don't need to skip if target is buffered already
      if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {

        if (skipper == null) {
          // Lazy init: first time this enum has ever been used for skipping
          skipper = new Lucene50SkipReader(docIn.clone(),
                                           MAX_SKIP_LEVELS,
                                           indexHasPos,
                                           indexHasOffsets,
                                           indexHasPayloads);
        }

        if (!skipped) {
          assert skipOffset != -1;
          // This is the first time this enum has skipped
          // since reset() was called; load the skip data:
          skipper.init(docTermStartFP+skipOffset, docTermStartFP, 0, 0, docFreq);
          skipped = true;
        }

        // always plus one to fix the result, since skip position in Lucene50SkipReader 
        // is a little different from MultiLevelSkipListReader
        final int newDocUpto = skipper.skipTo(target) + 1; 

        if (newDocUpto > docUpto) {
          // Skipper moved
          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();               // actually, this is just lastSkipEntry
          docIn.seek(skipper.getDocPointer());    // now point to the block we want to search
        }
        // next time we call advance, this is used to 
        // foresee whether skipper is necessary.
        nextSkipDoc = skipper.getNextSkipDoc();
      }
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      // Now scan... this is an inlined/pared down version
      // of nextDoc():
      while (true) {
        accum += docDeltaBuffer[docBufferUpto];
        docUpto++;

        if (accum >= target) {
          break;
        }
        docBufferUpto++;
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
      }

      freq = freqBuffer[docBufferUpto];
      docBufferUpto++;
      return doc = accum;
    }
    
    @Override
    public long cost() {
      return docFreq;
    }
  }


  final class BlockPostingsEnum extends PostingsEnum {
    
    private final byte[] encoded;

    private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
    private final int[] freqBuffer = new int[MAX_DATA_SIZE];
    private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];

    private int docBufferUpto;
    private int posBufferUpto;

    private Lucene50SkipReader skipper;
    private boolean skipped;

    final IndexInput startDocIn;

    IndexInput docIn;
    final IndexInput posIn;

    final boolean indexHasOffsets;
    final boolean indexHasPayloads;

    private int docFreq;                              // number of docs in this posting list
    private long totalTermFreq;                       // number of positions in this posting list
    private int docUpto;                              // how many docs we've read
    private int doc;                                  // doc we last read
    private int accum;                                // accumulator for doc deltas
    private int freq;                                 // freq we last read
    private int position;                             // current position

    // how many positions "behind" we are; nextPosition must
    // skip these to "catch up":
    private int posPendingCount;

    // Lazy pos seek: if != -1 then we must seek to this FP
    // before reading positions:
    private long posPendingFP;

    // Where this term's postings start in the .doc file:
    private long docTermStartFP;

    // Where this term's postings start in the .pos file:
    private long posTermStartFP;

    // Where this term's payloads/offsets start in the .pay
    // file:
    private long payTermStartFP;

    // File pointer where the last (vInt encoded) pos delta
    // block is.  We need this to know whether to bulk
    // decode vs vInt decode the block:
    private long lastPosBlockFP;

    // Where this term's skip data starts (after
    // docTermStartFP) in the .doc file (or -1 if there is
    // no skip data for this term):
    private long skipOffset;

    private int nextSkipDoc;

    private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
    
    public BlockPostingsEnum(FieldInfo fieldInfo) throws IOException {
      this.startDocIn = Lucene50PostingsReader.this.docIn;
      this.docIn = null;
      this.posIn = Lucene50PostingsReader.this.posIn.clone();
      encoded = new byte[MAX_ENCODED_SIZE];
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      indexHasPayloads = fieldInfo.hasPayloads();
    }

    public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
      return docIn == startDocIn &&
        indexHasOffsets == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) &&
        indexHasPayloads == fieldInfo.hasPayloads();
    }
    
    public PostingsEnum reset(IntBlockTermState termState) throws IOException {
      docFreq = termState.docFreq;
      docTermStartFP = termState.docStartFP;
      posTermStartFP = termState.posStartFP;
      payTermStartFP = termState.payStartFP;
      skipOffset = termState.skipOffset;
      totalTermFreq = termState.totalTermFreq;
      singletonDocID = termState.singletonDocID;
      if (docFreq > 1) {
        if (docIn == null) {
          // lazy init
          docIn = startDocIn.clone();
        }
        docIn.seek(docTermStartFP);
      }
      posPendingFP = posTermStartFP;
      posPendingCount = 0;
      if (termState.totalTermFreq < BLOCK_SIZE) {
        lastPosBlockFP = posTermStartFP;
      } else if (termState.totalTermFreq == BLOCK_SIZE) {
        lastPosBlockFP = -1;
      } else {
        lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
      }

      doc = -1;
      accum = 0;
      docUpto = 0;
      if (docFreq > BLOCK_SIZE) {
        nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
      } else {
        nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
      }
      docBufferUpto = BLOCK_SIZE;
      skipped = false;
      return this;
    }
    
    @Override
    public int freq() throws IOException {
      return freq;
    }

    @Override
    public int docID() {
      return doc;
    }

    private void refillDocs() throws IOException {
      final int left = docFreq - docUpto;
      assert left > 0;

      if (left >= BLOCK_SIZE) {
        forUtil.readBlock(docIn, encoded, docDeltaBuffer);
        forUtil.readBlock(docIn, encoded, freqBuffer);
      } else if (docFreq == 1) {
        docDeltaBuffer[0] = singletonDocID;
        freqBuffer[0] = (int) totalTermFreq;
      } else {
        // Read vInts:
        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
      }
      docBufferUpto = 0;
    }
    
    private void refillPositions() throws IOException {
      if (posIn.getFilePointer() == lastPosBlockFP) {
        final int count = (int) (totalTermFreq % BLOCK_SIZE);
        int payloadLength = 0;
        for(int i=0;i<count;i++) {
          int code = posIn.readVInt();
          if (indexHasPayloads) {
            if ((code & 1) != 0) {
              payloadLength = posIn.readVInt();
            }
            posDeltaBuffer[i] = code >>> 1;
            if (payloadLength != 0) {
              posIn.seek(posIn.getFilePointer() + payloadLength);
            }
          } else {
            posDeltaBuffer[i] = code;
          }
          if (indexHasOffsets) {
            if ((posIn.readVInt() & 1) != 0) {
              // offset length changed
              posIn.readVInt();
            }
          }
        }
      } else {
        forUtil.readBlock(posIn, encoded, posDeltaBuffer);
      }
    }

    @Override
    public int nextDoc() throws IOException {
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      accum += docDeltaBuffer[docBufferUpto];
      freq = freqBuffer[docBufferUpto];
      posPendingCount += freq;
      docBufferUpto++;
      docUpto++;

      doc = accum;
      position = 0;
      return doc;
    }
    
    @Override
    public int advance(int target) throws IOException {
      // TODO: make frq block load lazy/skippable

      if (target > nextSkipDoc) {
        if (skipper == null) {
          // Lazy init: first time this enum has ever been used for skipping
          skipper = new Lucene50SkipReader(docIn.clone(),
                                           MAX_SKIP_LEVELS,
                                           true,
                                           indexHasOffsets,
                                           indexHasPayloads);
        }

        if (!skipped) {
          assert skipOffset != -1;
          // This is the first time this enum has skipped
          // since reset() was called; load the skip data:
          skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
          skipped = true;
        }

        final int newDocUpto = skipper.skipTo(target) + 1; 

        if (newDocUpto > docUpto) {
          // Skipper moved

          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();
          docIn.seek(skipper.getDocPointer());
          posPendingFP = skipper.getPosPointer();
          posPendingCount = skipper.getPosBufferUpto();
        }
        nextSkipDoc = skipper.getNextSkipDoc();
      }
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      // Now scan... this is an inlined/pared down version
      // of nextDoc():
      while (true) {
        accum += docDeltaBuffer[docBufferUpto];
        freq = freqBuffer[docBufferUpto];
        posPendingCount += freq;
        docBufferUpto++;
        docUpto++;

        if (accum >= target) {
          break;
        }
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
      }

      position = 0;
      return doc = accum;
    }

    // TODO: in theory we could avoid loading frq block
    // when not needed, ie, use skip data to load how far to
    // seek the pos pointer ... instead of having to load frq
    // blocks only to sum up how many positions to skip
    private void skipPositions() throws IOException {
      // Skip positions now:
      int toSkip = posPendingCount - freq;

      final int leftInBlock = BLOCK_SIZE - posBufferUpto;
      if (toSkip < leftInBlock) {
        posBufferUpto += toSkip;
      } else {
        toSkip -= leftInBlock;
        while(toSkip >= BLOCK_SIZE) {
          assert posIn.getFilePointer() != lastPosBlockFP;
          forUtil.skipBlock(posIn);
          toSkip -= BLOCK_SIZE;
        }
        refillPositions();
        posBufferUpto = toSkip;
      }

      position = 0;
    }

    @Override
    public int nextPosition() throws IOException {

      assert posPendingCount > 0;

      if (posPendingFP != -1) {
        posIn.seek(posPendingFP);
        posPendingFP = -1;

        // Force buffer refill:
        posBufferUpto = BLOCK_SIZE;
      }

      if (posPendingCount > freq) {
        skipPositions();
        posPendingCount = freq;
      }

      if (posBufferUpto == BLOCK_SIZE) {
        refillPositions();
        posBufferUpto = 0;
      }
      position += posDeltaBuffer[posBufferUpto++];
      posPendingCount--;
      return position;
    }

    @Override
    public int startOffset() {
      return -1;
    }
  
    @Override
    public int endOffset() {
      return -1;
    }
  
    @Override
    public BytesRef getPayload() {
      return null;
    }
    
    @Override
    public long cost() {
      return docFreq;
    }
  }

  // Also handles payloads + offsets
  final class EverythingEnum extends PostingsEnum {
    
    private final byte[] encoded;

    private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
    private final int[] freqBuffer = new int[MAX_DATA_SIZE];
    private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];

    private final int[] payloadLengthBuffer;
    private final int[] offsetStartDeltaBuffer;
    private final int[] offsetLengthBuffer;

    private byte[] payloadBytes;
    private int payloadByteUpto;
    private int payloadLength;

    private int lastStartOffset;
    private int startOffset;
    private int endOffset;

    private int docBufferUpto;
    private int posBufferUpto;

    private Lucene50SkipReader skipper;
    private boolean skipped;

    final IndexInput startDocIn;

    IndexInput docIn;
    final IndexInput posIn;
    final IndexInput payIn;
    final BytesRef payload;

    final boolean indexHasOffsets;
    final boolean indexHasPayloads;

    private int docFreq;                              // number of docs in this posting list
    private long totalTermFreq;                       // number of positions in this posting list
    private int docUpto;                              // how many docs we've read
    private int doc;                                  // doc we last read
    private int accum;                                // accumulator for doc deltas
    private int freq;                                 // freq we last read
    private int position;                             // current position

    // how many positions "behind" we are; nextPosition must
    // skip these to "catch up":
    private int posPendingCount;

    // Lazy pos seek: if != -1 then we must seek to this FP
    // before reading positions:
    private long posPendingFP;

    // Lazy pay seek: if != -1 then we must seek to this FP
    // before reading payloads/offsets:
    private long payPendingFP;

    // Where this term's postings start in the .doc file:
    private long docTermStartFP;

    // Where this term's postings start in the .pos file:
    private long posTermStartFP;

    // Where this term's payloads/offsets start in the .pay
    // file:
    private long payTermStartFP;

    // File pointer where the last (vInt encoded) pos delta
    // block is.  We need this to know whether to bulk
    // decode vs vInt decode the block:
    private long lastPosBlockFP;

    // Where this term's skip data starts (after
    // docTermStartFP) in the .doc file (or -1 if there is
    // no skip data for this term):
    private long skipOffset;

    private int nextSkipDoc;
    
    private boolean needsOffsets; // true if we actually need offsets
    private boolean needsPayloads; // true if we actually need payloads
    private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
    
    public EverythingEnum(FieldInfo fieldInfo) throws IOException {
      this.startDocIn = Lucene50PostingsReader.this.docIn;
      this.docIn = null;
      this.posIn = Lucene50PostingsReader.this.posIn.clone();
      this.payIn = Lucene50PostingsReader.this.payIn.clone();
      encoded = new byte[MAX_ENCODED_SIZE];
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      if (indexHasOffsets) {
        offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
        offsetLengthBuffer = new int[MAX_DATA_SIZE];
      } else {
        offsetStartDeltaBuffer = null;
        offsetLengthBuffer = null;
        startOffset = -1;
        endOffset = -1;
      }

      indexHasPayloads = fieldInfo.hasPayloads();
      if (indexHasPayloads) {
        payloadLengthBuffer = new int[MAX_DATA_SIZE];
        payloadBytes = new byte[128];
        payload = new BytesRef();
      } else {
        payloadLengthBuffer = null;
        payloadBytes = null;
        payload = null;
      }
    }

    public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
      return docIn == startDocIn &&
        indexHasOffsets == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) &&
        indexHasPayloads == fieldInfo.hasPayloads();
    }
    
    public EverythingEnum reset(IntBlockTermState termState, int flags) throws IOException {
      docFreq = termState.docFreq;
      docTermStartFP = termState.docStartFP;
      posTermStartFP = termState.posStartFP;
      payTermStartFP = termState.payStartFP;
      skipOffset = termState.skipOffset;
      totalTermFreq = termState.totalTermFreq;
      singletonDocID = termState.singletonDocID;
      if (docFreq > 1) {
        if (docIn == null) {
          // lazy init
          docIn = startDocIn.clone();
        }
        docIn.seek(docTermStartFP);
      }
      posPendingFP = posTermStartFP;
      payPendingFP = payTermStartFP;
      posPendingCount = 0;
      if (termState.totalTermFreq < BLOCK_SIZE) {
        lastPosBlockFP = posTermStartFP;
      } else if (termState.totalTermFreq == BLOCK_SIZE) {
        lastPosBlockFP = -1;
      } else {
        lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
      }

      this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
      this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);

      doc = -1;
      accum = 0;
      docUpto = 0;
      if (docFreq > BLOCK_SIZE) {
        nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
      } else {
        nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
      }
      docBufferUpto = BLOCK_SIZE;
      skipped = false;
      return this;
    }
    
    @Override
    public int freq() throws IOException {
      return freq;
    }

    @Override
    public int docID() {
      return doc;
    }

    private void refillDocs() throws IOException {
      final int left = docFreq - docUpto;
      assert left > 0;

      if (left >= BLOCK_SIZE) {
        forUtil.readBlock(docIn, encoded, docDeltaBuffer);
        forUtil.readBlock(docIn, encoded, freqBuffer);
      } else if (docFreq == 1) {
        docDeltaBuffer[0] = singletonDocID;
        freqBuffer[0] = (int) totalTermFreq;
      } else {
        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
      }
      docBufferUpto = 0;
    }
    
    private void refillPositions() throws IOException {
      if (posIn.getFilePointer() == lastPosBlockFP) {
        final int count = (int) (totalTermFreq % BLOCK_SIZE);
        int payloadLength = 0;
        int offsetLength = 0;
        payloadByteUpto = 0;
        for(int i=0;i<count;i++) {
          int code = posIn.readVInt();
          if (indexHasPayloads) {
            if ((code & 1) != 0) {
              payloadLength = posIn.readVInt();
            }
            payloadLengthBuffer[i] = payloadLength;
            posDeltaBuffer[i] = code >>> 1;
            if (payloadLength != 0) {
              if (payloadByteUpto + payloadLength > payloadBytes.length) {
                payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
              }
              posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
              payloadByteUpto += payloadLength;
            }
          } else {
            posDeltaBuffer[i] = code;
          }

          if (indexHasOffsets) {
            int deltaCode = posIn.readVInt();
            if ((deltaCode & 1) != 0) {
              offsetLength = posIn.readVInt();
            }
            offsetStartDeltaBuffer[i] = deltaCode >>> 1;
            offsetLengthBuffer[i] = offsetLength;
          }
        }
        payloadByteUpto = 0;
      } else {
        forUtil.readBlock(posIn, encoded, posDeltaBuffer);

        if (indexHasPayloads) {
          if (needsPayloads) {
            forUtil.readBlock(payIn, encoded, payloadLengthBuffer);
            int numBytes = payIn.readVInt();

            if (numBytes > payloadBytes.length) {
              payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
            }
            payIn.readBytes(payloadBytes, 0, numBytes);
          } else {
            // this works, because when writing a vint block we always force the first length to be written
            forUtil.skipBlock(payIn); // skip over lengths
            int numBytes = payIn.readVInt(); // read length of payloadBytes
            payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
          }
          payloadByteUpto = 0;
        }

        if (indexHasOffsets) {
          if (needsOffsets) {
            forUtil.readBlock(payIn, encoded, offsetStartDeltaBuffer);
            forUtil.readBlock(payIn, encoded, offsetLengthBuffer);
          } else {
            // this works, because when writing a vint block we always force the first length to be written
            forUtil.skipBlock(payIn); // skip over starts
            forUtil.skipBlock(payIn); // skip over lengths
          }
        }
      }
    }

    @Override
    public int nextDoc() throws IOException {
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      accum += docDeltaBuffer[docBufferUpto];
      freq = freqBuffer[docBufferUpto];
      posPendingCount += freq;
      docBufferUpto++;
      docUpto++;

      doc = accum;
      position = 0;
      lastStartOffset = 0;
      return doc;
    }
    
    @Override
    public int advance(int target) throws IOException {
      // TODO: make frq block load lazy/skippable

      if (target > nextSkipDoc) {
        if (skipper == null) {
          // Lazy init: first time this enum has ever been used for skipping
          skipper = new Lucene50SkipReader(docIn.clone(),
                                        MAX_SKIP_LEVELS,
                                        true,
                                        indexHasOffsets,
                                        indexHasPayloads);
        }

        if (!skipped) {
          assert skipOffset != -1;
          // This is the first time this enum has skipped
          // since reset() was called; load the skip data:
          skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
          skipped = true;
        }

        final int newDocUpto = skipper.skipTo(target) + 1; 

        if (newDocUpto > docUpto) {
          // Skipper moved
          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();
          docIn.seek(skipper.getDocPointer());
          posPendingFP = skipper.getPosPointer();
          payPendingFP = skipper.getPayPointer();
          posPendingCount = skipper.getPosBufferUpto();
          lastStartOffset = 0; // new document
          payloadByteUpto = skipper.getPayloadByteUpto();
        }
        nextSkipDoc = skipper.getNextSkipDoc();
      }
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      // Now scan:
      while (true) {
        accum += docDeltaBuffer[docBufferUpto];
        freq = freqBuffer[docBufferUpto];
        posPendingCount += freq;
        docBufferUpto++;
        docUpto++;

        if (accum >= target) {
          break;
        }
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
      }

      position = 0;
      lastStartOffset = 0;
      return doc = accum;
    }

    // TODO: in theory we could avoid loading frq block
    // when not needed, ie, use skip data to load how far to
    // seek the pos pointer ... instead of having to load frq
    // blocks only to sum up how many positions to skip
    private void skipPositions() throws IOException {
      // Skip positions now:
      int toSkip = posPendingCount - freq;
      // if (DEBUG) {
      //   System.out.println("      FPR.skipPositions: toSkip=" + toSkip);
      // }

      final int leftInBlock = BLOCK_SIZE - posBufferUpto;
      if (toSkip < leftInBlock) {
        int end = posBufferUpto + toSkip;
        while(posBufferUpto < end) {
          if (indexHasPayloads) {
            payloadByteUpto += payloadLengthBuffer[posBufferUpto];
          }
          posBufferUpto++;
        }
      } else {
        toSkip -= leftInBlock;
        while(toSkip >= BLOCK_SIZE) {
          assert posIn.getFilePointer() != lastPosBlockFP;
          forUtil.skipBlock(posIn);

          if (indexHasPayloads) {
            // Skip payloadLength block:
            forUtil.skipBlock(payIn);

            // Skip payloadBytes block:
            int numBytes = payIn.readVInt();
            payIn.seek(payIn.getFilePointer() + numBytes);
          }

          if (indexHasOffsets) {
            forUtil.skipBlock(payIn);
            forUtil.skipBlock(payIn);
          }
          toSkip -= BLOCK_SIZE;
        }
        refillPositions();
        payloadByteUpto = 0;
        posBufferUpto = 0;
        while(posBufferUpto < toSkip) {
          if (indexHasPayloads) {
            payloadByteUpto += payloadLengthBuffer[posBufferUpto];
          }
          posBufferUpto++;
        }
      }

      position = 0;
      lastStartOffset = 0;
    }

    @Override
    public int nextPosition() throws IOException {
      assert posPendingCount > 0;
      
      if (posPendingFP != -1) {
        posIn.seek(posPendingFP);
        posPendingFP = -1;

        if (payPendingFP != -1) {
          payIn.seek(payPendingFP);
          payPendingFP = -1;
        }

        // Force buffer refill:
        posBufferUpto = BLOCK_SIZE;
      }

      if (posPendingCount > freq) {
        skipPositions();
        posPendingCount = freq;
      }

      if (posBufferUpto == BLOCK_SIZE) {
        refillPositions();
        posBufferUpto = 0;
      }
      position += posDeltaBuffer[posBufferUpto];

      if (indexHasPayloads) {
        payloadLength = payloadLengthBuffer[posBufferUpto];
        payload.bytes = payloadBytes;
        payload.offset = payloadByteUpto;
        payload.length = payloadLength;
        payloadByteUpto += payloadLength;
      }

      if (indexHasOffsets) {
        startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
        endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
        lastStartOffset = startOffset;
      }

      posBufferUpto++;
      posPendingCount--;
      return position;
    }

    @Override
    public int startOffset() {
      return startOffset;
    }
  
    @Override
    public int endOffset() {
      return endOffset;
    }
  
    @Override
    public BytesRef getPayload() {
      if (payloadLength == 0) {
        return null;
      } else {
        return payload;
      }
    }
    
    @Override
    public long cost() {
      return docFreq;
    }
  }

  @Override
  public long ramBytesUsed() {
    return BASE_RAM_BYTES_USED;
  }

  @Override
  public void checkIntegrity() throws IOException {
    if (docIn != null) {
      CodecUtil.checksumEntireFile(docIn);
    }
    if (posIn != null) {
      CodecUtil.checksumEntireFile(posIn);
    }
    if (payIn != null) {
      CodecUtil.checksumEntireFile(payIn);
    }
  }

  @Override
  public String toString() {
    return getClass().getSimpleName() + "(positions=" + (posIn != null) + ",payloads=" + (payIn != null) +")";
  }
}