EliasFanoDecoder.java example

Explorer
heliosearch-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.util.packed;

import org.apache.lucene.util.BroadWord; // bit selection in long


/** A decoder for an {@link EliasFanoEncoder}.
 * @lucene.internal
 */
public class EliasFanoDecoder {
  private static final int LOG2_LONG_SIZE = Long.numberOfTrailingZeros(Long.SIZE);

  private final EliasFanoEncoder efEncoder;
  private final long numEncoded;
  private long efIndex = -1; // the decoding index.
  private long setBitForIndex = -1; // the index of the high bit at the decoding index.

  public final static long NO_MORE_VALUES = -1L;

  private final long numIndexEntries;
  private final long indexMask;

  /** Construct a decoder for a given {@link EliasFanoEncoder}.
   * The decoding index is set to just before the first encoded value.
   */
  public EliasFanoDecoder(EliasFanoEncoder efEncoder) {
    this.efEncoder = efEncoder;
    this.numEncoded = efEncoder.numEncoded; // not final in EliasFanoEncoder
    this.numIndexEntries = efEncoder.currentEntryIndex;  // not final in EliasFanoEncoder
    this.indexMask = (1L << efEncoder.nIndexEntryBits) - 1;
  }

  /** @return The Elias-Fano encoder that is decoded. */
  public EliasFanoEncoder getEliasFanoEncoder() {
    return efEncoder;
  }
  
  /** The number of values encoded by the encoder.
   * @return The number of values encoded by the encoder.
   */
  public long numEncoded() { 
    return numEncoded;
  }


  /** The current decoding index.
   * The first value encoded by {@link EliasFanoEncoder#encodeNext} has index 0.
   * Only valid directly after
   * {@link #nextValue}, {@link #advanceToValue},
   * {@link #previousValue}, or {@link #backToValue}
   * returned another value than {@link #NO_MORE_VALUES},
   * or {@link #advanceToIndex} returned true.
   * @return The decoding index of the last decoded value, or as last set by {@link #advanceToIndex}.
   */
  public long currentIndex() {
    if (efIndex < 0) {
      throw new IllegalStateException("index before sequence");
    }
    if (efIndex >= numEncoded) {
      throw new IllegalStateException("index after sequence");
    }
    return efIndex;
  }

  /** The value at the current decoding index.
   * Only valid when {@link #currentIndex} would return a valid result.
   * <br>This is only intended for use after {@link #advanceToIndex} returned true.
   * @return The value encoded at {@link #currentIndex}.
   */
  public long currentValue() {
    return combineHighLowValues(currentHighValue(), currentLowValue());
  }

  /**  @return The high value for the current decoding index. */
  private long currentHighValue() {
    return setBitForIndex - efIndex; // sequence of unary gaps
  }

  /** See also {@link EliasFanoEncoder#packValue} */
  private static long unPackValue(long[] longArray, int numBits, long packIndex, long bitsMask) {
    if (numBits == 0) {
      return 0;
    }
    long bitPos = packIndex * numBits;
    int index = (int) (bitPos >>> LOG2_LONG_SIZE);
    int bitPosAtIndex = (int) (bitPos & (Long.SIZE-1));
    long value = longArray[index] >>> bitPosAtIndex;
    if ((bitPosAtIndex + numBits) > Long.SIZE) {
      value |= (longArray[index + 1] << (Long.SIZE - bitPosAtIndex));
    }
    value &= bitsMask;
    return value;
  }

  /**  @return The low value for the current decoding index. */
  private long currentLowValue() {
    assert ((efIndex >= 0) && (efIndex < numEncoded)) : "efIndex " + efIndex;
    return unPackValue(efEncoder.lowerLongs, efEncoder.numLowBits, efIndex, efEncoder.lowerBitsMask);
  }

  /**  @return The given highValue shifted left by the number of low bits from by the EliasFanoSequence,
   *           logically OR-ed with the given lowValue.
   */
  private long combineHighLowValues(long highValue, long lowValue) {
    return (highValue << efEncoder.numLowBits) | lowValue;
  }

  private long curHighLong;


  /* The implementation of forward decoding and backward decoding is done by the following method pairs.
   *
   * toBeforeSequence - toAfterSequence
   * getCurrentRightShift - getCurrentLeftShift
   * toAfterCurrentHighBit - toBeforeCurrentHighBit
   * toNextHighLong - toPreviousHighLong
   * nextHighValue - previousHighValue
   * nextValue - previousValue
   * advanceToValue - backToValue
   *
   */

  /* Forward decoding section */


  /** Set the decoding index to just before the first encoded value.
   */
  public void toBeforeSequence() {
    efIndex = -1;
    setBitForIndex = -1;
  }

  /** @return the number of bits in a long after (setBitForIndex modulo Long.SIZE) */
  private int getCurrentRightShift() {
    int s = (int) (setBitForIndex & (Long.SIZE-1));
    return s;
  }

  /** Increment efIndex and setBitForIndex and
   * shift curHighLong so that it does not contain the high bits before setBitForIndex.
   * @return true iff efIndex still smaller than numEncoded.
   */
  private boolean toAfterCurrentHighBit() {
    efIndex += 1;
    if (efIndex >= numEncoded) {
      return false;
    }
    setBitForIndex += 1;
    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
    curHighLong = efEncoder.upperLongs[highIndex] >>> getCurrentRightShift();
    return true;
  }

  /** The current high long has been determined to not contain the set bit that is needed.
   *  Increment setBitForIndex to the next high long and set curHighLong accordingly.
   */
  private void toNextHighLong() {
    setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
    //assert getCurrentRightShift() == 0;
    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
    curHighLong = efEncoder.upperLongs[highIndex];
  }

  /** setBitForIndex and efIndex have just been incremented, scan to the next high set bit
   *  by incrementing setBitForIndex, and by setting curHighLong accordingly.
   */
  private void toNextHighValue() {
    while (curHighLong == 0L) {
      toNextHighLong(); // inlining and unrolling would simplify somewhat
    }
    setBitForIndex += Long.numberOfTrailingZeros(curHighLong);
  }

  /** setBitForIndex and efIndex have just been incremented, scan to the next high set bit
   *  by incrementing setBitForIndex, and by setting curHighLong accordingly.
   *  @return the next encoded high value.
   */
  private long nextHighValue() {
    toNextHighValue();
    return currentHighValue();
  }

  /** If another value is available after the current decoding index, return this value and
   * and increase the decoding index by 1. Otherwise return {@link #NO_MORE_VALUES}.
   */
  public long nextValue() {
    if (! toAfterCurrentHighBit()) {
      return NO_MORE_VALUES;
    }
    long highValue = nextHighValue();
    return combineHighLowValues(highValue, currentLowValue());
  }

  /** Advance the decoding index to a given index.
   * and return <code>true</code> iff it is available.
   * <br>See also {@link #currentValue}.
   * <br>The current implementation does not use the index on the upper bit zero bit positions.
   * <br>Note: there is currently no implementation of <code>backToIndex</code>.
   */
  public boolean advanceToIndex(long index) {
    assert index > efIndex;
    if (index >= numEncoded) {
      efIndex = numEncoded;
      return false;
    }
    if (! toAfterCurrentHighBit()) {
      assert false;
    }
    /* CHECKME: Add a (binary) search in the upperZeroBitPositions here. */
    int curSetBits = Long.bitCount(curHighLong);
    while ((efIndex + curSetBits) < index) { // curHighLong has not enough set bits to reach index
      efIndex += curSetBits;
      toNextHighLong();
      curSetBits = Long.bitCount(curHighLong);
    }
    // curHighLong has enough set bits to reach index
    while (efIndex < index) {
      /* CHECKME: Instead of the linear search here, use (forward) broadword selection from
       * "Broadword Implementation of Rank/Select Queries", Sebastiano Vigna, January 30, 2012.
       */
      if (! toAfterCurrentHighBit()) {
        assert false;
      }
      toNextHighValue();
    }
    return true;
  }



  /** Given a target value, advance the decoding index to the first bigger or equal value
   * and return it if it is available. Otherwise return {@link #NO_MORE_VALUES}.
   * <br>The current implementation uses the index on the upper zero bit positions.
   */
  public long advanceToValue(long target) {
    efIndex += 1;
    if (efIndex >= numEncoded) {
      return NO_MORE_VALUES;
    }
    setBitForIndex += 1; // the high bit at setBitForIndex belongs to the unary code for efIndex

    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
    long upperLong = efEncoder.upperLongs[highIndex];
    curHighLong = upperLong >>> ((int) (setBitForIndex & (Long.SIZE-1))); // may contain the unary 1 bit for efIndex

    // determine index entry to advance to
    long highTarget = target >>> efEncoder.numLowBits;

    long indexEntryIndex = (highTarget / efEncoder.indexInterval) - 1;
    if (indexEntryIndex >= 0) { // not before first index entry
      if (indexEntryIndex >= numIndexEntries) {
        indexEntryIndex = numIndexEntries - 1; // no further than last index entry
      }
      long indexHighValue = (indexEntryIndex + 1) * efEncoder.indexInterval;
      assert indexHighValue <= highTarget;
      if (indexHighValue > (setBitForIndex - efIndex)) { // advance to just after zero bit position of index entry.
        setBitForIndex = unPackValue(efEncoder.upperZeroBitPositionIndex, efEncoder.nIndexEntryBits, indexEntryIndex, indexMask);
        efIndex = setBitForIndex - indexHighValue; // the high bit at setBitForIndex belongs to the unary code for efIndex
        highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
        upperLong = efEncoder.upperLongs[highIndex];
        curHighLong = upperLong >>> ((int) (setBitForIndex & (Long.SIZE-1))); // may contain the unary 1 bit for efIndex
      }
      assert efIndex < numEncoded; // there is a high value to be found.
    }

    int curSetBits = Long.bitCount(curHighLong); // shifted right.
    int curClearBits = Long.SIZE - curSetBits - ((int) (setBitForIndex & (Long.SIZE-1))); // subtract right shift, may be more than encoded

    while (((setBitForIndex - efIndex) + curClearBits) < highTarget) {
      // curHighLong has not enough clear bits to reach highTarget
      efIndex += curSetBits;
      if (efIndex >= numEncoded) {
        return NO_MORE_VALUES;
      }
      setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
      // highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
      assert (highIndex + 1) == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
      highIndex += 1;
      upperLong = efEncoder.upperLongs[highIndex];
      curHighLong = upperLong;
      curSetBits = Long.bitCount(curHighLong);
      curClearBits = Long.SIZE - curSetBits;
    }
    // curHighLong has enough clear bits to reach highTarget, and may not have enough set bits.
    while (curHighLong == 0L) {
      setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
      assert (highIndex + 1) == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
      highIndex += 1;
      upperLong = efEncoder.upperLongs[highIndex];
      curHighLong = upperLong;
    }

    // curHighLong has enough clear bits to reach highTarget, has at least 1 set bit, and may not have enough set bits.
    int rank = (int) (highTarget - (setBitForIndex - efIndex)); // the rank of the zero bit for highValue.
    assert (rank <= Long.SIZE) : ("rank " + rank);
    if (rank >= 1) {
      long invCurHighLong = ~curHighLong;
      int clearBitForValue = (rank <= 8)
                              ? BroadWord.selectNaive(invCurHighLong, rank)
                              : BroadWord.select(invCurHighLong, rank);
      assert clearBitForValue <= (Long.SIZE-1);
      setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero
      int oneBitsBeforeClearBit = clearBitForValue - rank + 1;
      efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to the unary code for efIndex
      if (efIndex >= numEncoded) {
        return NO_MORE_VALUES;
      }

      if ((setBitForIndex & (Long.SIZE - 1)) == 0L) { // exhausted curHighLong
        assert (highIndex + 1) == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
        highIndex += 1;
        upperLong = efEncoder.upperLongs[highIndex];
        curHighLong = upperLong;
      }
      else {
        assert highIndex == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
        curHighLong = upperLong >>> ((int) (setBitForIndex & (Long.SIZE-1)));
      }
      // curHighLong has enough clear bits to reach highTarget, and may not have enough set bits.
 
      while (curHighLong == 0L) {
        setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
        assert (highIndex + 1) == (int)(setBitForIndex >>> LOG2_LONG_SIZE);
        highIndex += 1;
        upperLong = efEncoder.upperLongs[highIndex];
        curHighLong = upperLong;
      }
    }
    setBitForIndex += Long.numberOfTrailingZeros(curHighLong);
    assert (setBitForIndex - efIndex) >= highTarget; // highTarget reached

    // Linear search also with low values
    long currentValue = combineHighLowValues((setBitForIndex - efIndex), currentLowValue());
    while (currentValue < target) {
      currentValue = nextValue();
      if (currentValue == NO_MORE_VALUES) {
        return NO_MORE_VALUES;
      }
    }
    return currentValue;
  }


  /* Backward decoding section */

  /** Set the decoding index to just after the last encoded value.
   */
  public void toAfterSequence() {
    efIndex = numEncoded; // just after last index
    setBitForIndex = (efEncoder.lastEncoded >>> efEncoder.numLowBits) + numEncoded;
  }

  /** @return the number of bits in a long before (setBitForIndex modulo Long.SIZE) */
  private int getCurrentLeftShift() {
    int s = Long.SIZE - 1 - (int) (setBitForIndex & (Long.SIZE-1));
    return s;
  }

  /** Decrement efindex and setBitForIndex and
   * shift curHighLong so that it does not contain the high bits after setBitForIndex.
   * @return true iff efindex still >= 0
   */
  private boolean toBeforeCurrentHighBit() {
    efIndex -= 1;
    if (efIndex < 0) {
      return false;
    }
    setBitForIndex -= 1;
    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
    curHighLong = efEncoder.upperLongs[highIndex] << getCurrentLeftShift();
    return true;
  }

  /** The current high long has been determined to not contain the set bit that is needed.
   *  Decrement setBitForIndex to the previous high long and set curHighLong accordingly.
   */
  private void toPreviousHighLong() {
    setBitForIndex -= (setBitForIndex & (Long.SIZE-1)) + 1;
    //assert getCurrentLeftShift() == 0;
    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
    curHighLong = efEncoder.upperLongs[highIndex];
  }

  /** setBitForIndex and efIndex have just been decremented, scan to the previous high set bit
   *  by decrementing setBitForIndex and by setting curHighLong accordingly.
   *  @return the previous encoded high value.
   */
  private long previousHighValue() {
    while (curHighLong == 0L) {
      toPreviousHighLong(); // inlining and unrolling would simplify somewhat
    }
    setBitForIndex -= Long.numberOfLeadingZeros(curHighLong);
    return currentHighValue();
  }

  /** If another value is available before the current decoding index, return this value
   * and decrease the decoding index by 1. Otherwise return {@link #NO_MORE_VALUES}.
   */
  public long previousValue() {
    if (! toBeforeCurrentHighBit()) {
      return NO_MORE_VALUES;
    }
    long highValue = previousHighValue();
    return combineHighLowValues(highValue, currentLowValue());
  }


  /** setBitForIndex and efIndex have just been decremented, scan backward to the high set bit
   *  of at most a given high value
   *  by decrementing setBitForIndex and by setting curHighLong accordingly.
   * <br>The current implementation does not use the index on the upper zero bit positions.
   *  @return the largest encoded high value that is at most the given one.
   */
  private long backToHighValue(long highTarget) {
    /* CHECKME: Add using the index as in advanceToHighValue */
    int curSetBits = Long.bitCount(curHighLong); // is shifted by getCurrentLeftShift()
    int curClearBits = Long.SIZE - curSetBits - getCurrentLeftShift();
    while ((currentHighValue() - curClearBits) > highTarget) {
      // curHighLong has not enough clear bits to reach highTarget
      efIndex -= curSetBits;
      if (efIndex < 0) {
        return NO_MORE_VALUES;
      }
      toPreviousHighLong();
      //assert getCurrentLeftShift() == 0;
      curSetBits = Long.bitCount(curHighLong);
      curClearBits = Long.SIZE - curSetBits;
    }
    // curHighLong has enough clear bits to reach highTarget, but may not have enough set bits.
    long highValue = previousHighValue();
    while (highValue > highTarget) {
      /* CHECKME: See at advanceToHighValue on using broadword bit selection. */
      if (! toBeforeCurrentHighBit()) {
        return NO_MORE_VALUES;
      }
      highValue = previousHighValue();
    }
    return highValue;
  }

  /** Given a target value, go back to the first smaller or equal value
   * and return it if it is available. Otherwise return {@link #NO_MORE_VALUES}.
   * <br>The current implementation does not use the index on the upper zero bit positions.
   */
  public long backToValue(long target) {
    if (! toBeforeCurrentHighBit()) {
      return NO_MORE_VALUES;
    }
    long highTarget = target >>> efEncoder.numLowBits;
    long highValue = backToHighValue(highTarget);
    if (highValue == NO_MORE_VALUES) {
      return NO_MORE_VALUES;
    }
    // Linear search with low values:
    long currentValue = combineHighLowValues(highValue, currentLowValue());
    while (currentValue > target) {
      currentValue = previousValue();
      if (currentValue == NO_MORE_VALUES) {
        return NO_MORE_VALUES;
      }
    }
    return currentValue;
  }
}