Lucene54DocValuesProducer.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.lucene54;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesConsumer.NumberType;
import org.apache.lucene.index.*;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.DirectMonotonicReader;
import org.apache.lucene.util.packed.DirectReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;

import static org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat.*;

/** reader for {@link Lucene54DocValuesFormat} */
final class Lucene54DocValuesProducer extends DocValuesProducer implements Closeable {
  private final Map<String,NumericEntry> numerics = new HashMap<>();
  private final Map<String,BinaryEntry> binaries = new HashMap<>();
  private final Map<String,SortedSetEntry> sortedSets = new HashMap<>();
  private final Map<String,SortedSetEntry> sortedNumerics = new HashMap<>();
  private final Map<String,NumericEntry> ords = new HashMap<>();
  private final Map<String,NumericEntry> ordIndexes = new HashMap<>();
  private final int numFields;
  private final AtomicLong ramBytesUsed;
  private final IndexInput data;
  private final int maxDoc;

  // memory-resident structures
  private final Map<String,MonotonicBlockPackedReader> addressInstances = new HashMap<>();
  private final Map<String,ReverseTermsIndex> reverseIndexInstances = new HashMap<>();
  private final Map<String,DirectMonotonicReader.Meta> directAddressesMeta = new HashMap<>();

  private final boolean merging;

  // clone for merge: when merging we don't do any instances.put()s
  Lucene54DocValuesProducer(Lucene54DocValuesProducer original) throws IOException {
    assert Thread.holdsLock(original);
    numerics.putAll(original.numerics);
    binaries.putAll(original.binaries);
    sortedSets.putAll(original.sortedSets);
    sortedNumerics.putAll(original.sortedNumerics);
    ords.putAll(original.ords);
    ordIndexes.putAll(original.ordIndexes);
    numFields = original.numFields;
    ramBytesUsed = new AtomicLong(original.ramBytesUsed.get());
    data = original.data.clone();
    maxDoc = original.maxDoc;

    addressInstances.putAll(original.addressInstances);
    reverseIndexInstances.putAll(original.reverseIndexInstances);
    merging = true;
  }

  /** expert: instantiates a new reader */
  Lucene54DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    this.maxDoc = state.segmentInfo.maxDoc();
    merging = false;
    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));

    int version = -1;
    int numFields = -1;

    // read in the entries from the metadata file.
    try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
      Throwable priorE = null;
      try {
        version = CodecUtil.checkIndexHeader(in, metaCodec,
                                        Lucene54DocValuesFormat.VERSION_START,
                                        Lucene54DocValuesFormat.VERSION_CURRENT,
                                        state.segmentInfo.getId(),
                                        state.segmentSuffix);
        numFields = readFields(in, state.fieldInfos);
      } catch (Throwable exception) {
        priorE = exception;
      } finally {
        CodecUtil.checkFooter(in, priorE);
      }
    }

    this.numFields = numFields;
    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
    this.data = state.directory.openInput(dataName, state.context);
    boolean success = false;
    try {
      final int version2 = CodecUtil.checkIndexHeader(data, dataCodec,
                                                 Lucene54DocValuesFormat.VERSION_START,
                                                 Lucene54DocValuesFormat.VERSION_CURRENT,
                                                 state.segmentInfo.getId(),
                                                 state.segmentSuffix);
      if (version != version2) {
        throw new CorruptIndexException("Format versions mismatch: meta=" + version + ", data=" + version2, data);
      }

      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(data);

      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
  }

  private void readSortedField(FieldInfo info, IndexInput meta) throws IOException {
    // sorted = binary + numeric
    if (meta.readVInt() != info.number) {
      throw new CorruptIndexException("sorted entry for field: " + info.name + " is corrupt", meta);
    }
    if (meta.readByte() != Lucene54DocValuesFormat.BINARY) {
      throw new CorruptIndexException("sorted entry for field: " + info.name + " is corrupt", meta);
    }
    BinaryEntry b = readBinaryEntry(info, meta);
    binaries.put(info.name, b);

    if (meta.readVInt() != info.number) {
      throw new CorruptIndexException("sorted entry for field: " + info.name + " is corrupt", meta);
    }
    if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
      throw new CorruptIndexException("sorted entry for field: " + info.name + " is corrupt", meta);
    }
    NumericEntry n = readNumericEntry(info, meta);
    ords.put(info.name, n);
  }

  private void readSortedSetFieldWithAddresses(FieldInfo info, IndexInput meta) throws IOException {
    // sortedset = binary + numeric (addresses) + ordIndex
    if (meta.readVInt() != info.number) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    if (meta.readByte() != Lucene54DocValuesFormat.BINARY) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    BinaryEntry b = readBinaryEntry(info, meta);
    binaries.put(info.name, b);

    if (meta.readVInt() != info.number) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    NumericEntry n1 = readNumericEntry(info, meta);
    ords.put(info.name, n1);

    if (meta.readVInt() != info.number) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    NumericEntry n2 = readNumericEntry(info, meta);
    ordIndexes.put(info.name, n2);
  }

  private void readSortedSetFieldWithTable(FieldInfo info, IndexInput meta) throws IOException {
    // sortedset table = binary + ordset table + ordset index
    if (meta.readVInt() != info.number) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    if (meta.readByte() != Lucene54DocValuesFormat.BINARY) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }

    BinaryEntry b = readBinaryEntry(info, meta);
    binaries.put(info.name, b);

    if (meta.readVInt() != info.number) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
      throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
    }
    NumericEntry n = readNumericEntry(info, meta);
    ords.put(info.name, n);
  }

  private int readFields(IndexInput meta, FieldInfos infos) throws IOException {
    int numFields = 0;
    int fieldNumber = meta.readVInt();
    while (fieldNumber != -1) {
      numFields++;
      FieldInfo info = infos.fieldInfo(fieldNumber);
      if (info == null) {
        // trickier to validate more: because we use multiple entries for "composite" types like sortedset, etc.
        throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
      }
      byte type = meta.readByte();
      if (type == Lucene54DocValuesFormat.NUMERIC) {
        numerics.put(info.name, readNumericEntry(info, meta));
      } else if (type == Lucene54DocValuesFormat.BINARY) {
        BinaryEntry b = readBinaryEntry(info, meta);
        binaries.put(info.name, b);
      } else if (type == Lucene54DocValuesFormat.SORTED) {
        readSortedField(info, meta);
      } else if (type == Lucene54DocValuesFormat.SORTED_SET) {
        SortedSetEntry ss = readSortedSetEntry(meta);
        sortedSets.put(info.name, ss);
        if (ss.format == SORTED_WITH_ADDRESSES) {
          readSortedSetFieldWithAddresses(info, meta);
        } else if (ss.format == SORTED_SET_TABLE) {
          readSortedSetFieldWithTable(info, meta);
        } else if (ss.format == SORTED_SINGLE_VALUED) {
          if (meta.readVInt() != fieldNumber) {
            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
          }
          if (meta.readByte() != Lucene54DocValuesFormat.SORTED) {
            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
          }
          readSortedField(info, meta);
        } else {
          throw new AssertionError();
        }
      } else if (type == Lucene54DocValuesFormat.SORTED_NUMERIC) {
        SortedSetEntry ss = readSortedSetEntry(meta);
        sortedNumerics.put(info.name, ss);
        if (ss.format == SORTED_WITH_ADDRESSES) {
          if (meta.readVInt() != fieldNumber) {
            throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
          }
          if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
            throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
          }
          numerics.put(info.name, readNumericEntry(info, meta));
          if (meta.readVInt() != fieldNumber) {
            throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
          }
          if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
            throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
          }
          NumericEntry ordIndex = readNumericEntry(info, meta);
          ordIndexes.put(info.name, ordIndex);
        } else if (ss.format == SORTED_SET_TABLE) {
          if (meta.readVInt() != info.number) {
            throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
          }
          if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
            throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
          }
          NumericEntry n = readNumericEntry(info, meta);
          ords.put(info.name, n);
        } else if (ss.format == SORTED_SINGLE_VALUED) {
          if (meta.readVInt() != fieldNumber) {
            throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
          }
          if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
            throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
          }
          numerics.put(info.name, readNumericEntry(info, meta));
        } else {
          throw new AssertionError();
        }
      } else {
        throw new CorruptIndexException("invalid type: " + type, meta);
      }
      fieldNumber = meta.readVInt();
    }
    return numFields;
  }

  private NumericEntry readNumericEntry(FieldInfo info, IndexInput meta) throws IOException {
    NumericEntry entry = new NumericEntry();
    entry.format = meta.readVInt();
    entry.missingOffset = meta.readLong();
    if (entry.format == SPARSE_COMPRESSED) {
      // sparse bits need a bit more metadata
      entry.numDocsWithValue = meta.readVLong();
      final int blockShift = meta.readVInt();
      entry.monotonicMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithValue, blockShift);
      ramBytesUsed.addAndGet(entry.monotonicMeta.ramBytesUsed());
      directAddressesMeta.put(info.name, entry.monotonicMeta);
    }
    entry.offset = meta.readLong();
    entry.count = meta.readVLong();
    switch(entry.format) {
      case CONST_COMPRESSED:
        entry.minValue = meta.readLong();
        if (entry.count > Integer.MAX_VALUE) {
          // currently just a limitation e.g. of bits interface and so on.
          throw new CorruptIndexException("illegal CONST_COMPRESSED count: " + entry.count, meta);
        }
        break;
      case GCD_COMPRESSED:
        entry.minValue = meta.readLong();
        entry.gcd = meta.readLong();
        entry.bitsPerValue = meta.readVInt();
        break;
      case TABLE_COMPRESSED:
        final int uniqueValues = meta.readVInt();
        if (uniqueValues > 256) {
          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, got=" + uniqueValues, meta);
        }
        entry.table = new long[uniqueValues];
        for (int i = 0; i < uniqueValues; ++i) {
          entry.table[i] = meta.readLong();
        }
        ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.table));
        entry.bitsPerValue = meta.readVInt();
        break;
      case DELTA_COMPRESSED:
        entry.minValue = meta.readLong();
        entry.bitsPerValue = meta.readVInt();
        break;
      case MONOTONIC_COMPRESSED:
        final int blockShift = meta.readVInt();
        entry.monotonicMeta = DirectMonotonicReader.loadMeta(meta, maxDoc + 1, blockShift);
        ramBytesUsed.addAndGet(entry.monotonicMeta.ramBytesUsed());
        directAddressesMeta.put(info.name, entry.monotonicMeta);
        break;
      case SPARSE_COMPRESSED:
        final byte numberType = meta.readByte();
        switch (numberType) {
          case 0:
            entry.numberType = NumberType.VALUE;
            break;
          case 1:
            entry.numberType = NumberType.ORDINAL;
            break;
          default:
            throw new CorruptIndexException("Number type can only be 0 or 1, got=" + numberType, meta);
        }

        // now read the numeric entry for non-missing values
        final int fieldNumber = meta.readVInt();
        if (fieldNumber != info.number) {
          throw new CorruptIndexException("Field numbers mistmatch: " + fieldNumber + " != " + info.number, meta);
        }
        final int dvFormat = meta.readByte();
        if (dvFormat != NUMERIC) {
          throw new CorruptIndexException("Formats mistmatch: " + dvFormat + " != " + NUMERIC, meta);
        }
        entry.nonMissingValues = readNumericEntry(info, meta);
        break;
      default:
        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=", meta);
    }
    entry.endOffset = meta.readLong();
    return entry;
  }

  private BinaryEntry readBinaryEntry(FieldInfo info, IndexInput meta) throws IOException {
    BinaryEntry entry = new BinaryEntry();
    entry.format = meta.readVInt();
    entry.missingOffset = meta.readLong();
    entry.minLength = meta.readVInt();
    entry.maxLength = meta.readVInt();
    entry.count = meta.readVLong();
    entry.offset = meta.readLong();
    switch(entry.format) {
      case BINARY_FIXED_UNCOMPRESSED:
        break;
      case BINARY_PREFIX_COMPRESSED:
        entry.addressesOffset = meta.readLong();
        entry.packedIntsVersion = meta.readVInt();
        entry.blockSize = meta.readVInt();
        entry.reverseIndexOffset = meta.readLong();
        break;
      case BINARY_VARIABLE_UNCOMPRESSED:
        entry.addressesOffset = meta.readLong();
        final int blockShift = meta.readVInt();
        entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, entry.count + 1, blockShift);
        ramBytesUsed.addAndGet(entry.addressesMeta.ramBytesUsed());
        directAddressesMeta.put(info.name, entry.addressesMeta);
        entry.addressesEndOffset = meta.readLong();
        break;
      default:
        throw new CorruptIndexException("Unknown format: " + entry.format, meta);
    }
    return entry;
  }

  SortedSetEntry readSortedSetEntry(IndexInput meta) throws IOException {
    SortedSetEntry entry = new SortedSetEntry();
    entry.format = meta.readVInt();
    if (entry.format == SORTED_SET_TABLE) {
      final int totalTableLength = meta.readInt();
      if (totalTableLength > 256) {
        throw new CorruptIndexException("SORTED_SET_TABLE cannot have more than 256 values in its dictionary, got=" + totalTableLength, meta);
      }
      entry.table = new long[totalTableLength];
      for (int i = 0; i < totalTableLength; ++i) {
        entry.table[i] = meta.readLong();
      }
      ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.table));
      final int tableSize = meta.readInt();
      if (tableSize > totalTableLength + 1) { // +1 because of the empty set
        throw new CorruptIndexException("SORTED_SET_TABLE cannot have more set ids than ords in its dictionary, got " + totalTableLength + " ords and " + tableSize + " sets", meta);
      }
      entry.tableOffsets = new int[tableSize + 1];
      for (int i = 1; i < entry.tableOffsets.length; ++i) {
        entry.tableOffsets[i] = entry.tableOffsets[i - 1] + meta.readInt();
      }
      ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.tableOffsets));
    } else if (entry.format != SORTED_SINGLE_VALUED && entry.format != SORTED_WITH_ADDRESSES) {
      throw new CorruptIndexException("Unknown format: " + entry.format, meta);
    }
    return entry;
  }

  @Override
  public NumericDocValues getNumeric(FieldInfo field) throws IOException {
    NumericEntry entry = numerics.get(field.name);
    Bits docsWithField;

    if (entry.format == SPARSE_COMPRESSED) {
      return getSparseNumericDocValues(entry);
    } else {
      if (entry.missingOffset == ALL_MISSING) {
        return DocValues.emptyNumeric();
      } else if (entry.missingOffset == ALL_LIVE) {
        LongValues values = getNumeric(entry);
        return new NumericDocValues() {
          private int docID = -1;

          @Override
          public int docID() {
            return docID;
          }

          @Override
          public int nextDoc() {
            docID++;
            if (docID == maxDoc) {
              docID = NO_MORE_DOCS;
            }
            return docID;
          }

          @Override
          public int advance(int target) {
            if (target >= maxDoc) {
              docID = NO_MORE_DOCS;
            } else {
              docID = target;
            }
            return docID;
          }

          @Override
          public boolean advanceExact(int target) throws IOException {
            docID = target;
            return true;
          }

          @Override
          public long cost() {
            // TODO
            return 0;
          }

          @Override
          public long longValue() {
            return values.get(docID);
          }
        };
      } else {
        docsWithField = getLiveBits(entry.missingOffset, maxDoc);
      }
    }
    final LongValues values = getNumeric(entry);
    return new NumericDocValues() {

      int doc = -1;
      long value;

      @Override
      public long longValue() throws IOException {
        return value;
      }

      @Override
      public int docID() {
        return doc;
      }

      @Override
      public int nextDoc() throws IOException {
        return advance(doc + 1);
      }

      @Override
      public int advance(int target) throws IOException {
        for (int doc = target; doc < maxDoc; ++doc) {
          value = values.get(doc);
          if (value != 0 || docsWithField.get(doc)) {
            return this.doc = doc;
          }
        }
        return doc = NO_MORE_DOCS;
      }

      @Override
      public boolean advanceExact(int target) throws IOException {
        doc = target;
        value = values.get(doc);
        return value != 0 || docsWithField.get(doc);
      }

      @Override
      public long cost() {
        return maxDoc;
      }

    };
  }

  @Override
  public long ramBytesUsed() {
    return ramBytesUsed.get();
  }

  @Override
  public synchronized Collection<Accountable> getChildResources() {
    List<Accountable> resources = new ArrayList<>();
    resources.addAll(Accountables.namedAccountables("addresses field", addressInstances));
    resources.addAll(Accountables.namedAccountables("reverse index field", reverseIndexInstances));
    resources.addAll(Accountables.namedAccountables("direct addresses meta field", directAddressesMeta));
    return Collections.unmodifiableList(resources);
  }

  @Override
  public void checkIntegrity() throws IOException {
    CodecUtil.checksumEntireFile(data);
  }

  @Override
  public String toString() {
    return getClass().getSimpleName() + "(fields=" + numFields + ")";
  }

  LongValues getNumeric(NumericEntry entry) throws IOException {
    switch (entry.format) {
      case CONST_COMPRESSED: {
        final long constant = entry.minValue;
        final Bits live = getLiveBits(entry.missingOffset, (int)entry.count);
        return new LongValues() {
          @Override
          public long get(long index) {
            return live.get((int)index) ? constant : 0;
          }
        };
      }
      case DELTA_COMPRESSED: {
        RandomAccessInput slice = this.data.randomAccessSlice(entry.offset, entry.endOffset - entry.offset);
        final long delta = entry.minValue;
        final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue, 0);
        return new LongValues() {
          @Override
          public long get(long id) {
            return delta + values.get(id);
          }
        };
      }
      case GCD_COMPRESSED: {
        RandomAccessInput slice = this.data.randomAccessSlice(entry.offset, entry.endOffset - entry.offset);
        final long min = entry.minValue;
        final long mult = entry.gcd;
        final LongValues quotientReader = DirectReader.getInstance(slice, entry.bitsPerValue, 0);
        return new LongValues() {
          @Override
          public long get(long id) {
            return min + mult * quotientReader.get(id);
          }
        };
      }
      case TABLE_COMPRESSED: {
        RandomAccessInput slice = this.data.randomAccessSlice(entry.offset, entry.endOffset - entry.offset);
        final long table[] = entry.table;
        final LongValues ords = DirectReader.getInstance(slice, entry.bitsPerValue, 0);
        return new LongValues() {
          @Override
          public long get(long id) {
            return table[(int) ords.get(id)];
          }
        };
      }
      case SPARSE_COMPRESSED:
        final SparseNumericDocValues values = getSparseNumericDocValues(entry);
        final long missingValue;
        switch (entry.numberType) {
          case ORDINAL:
            missingValue = -1L;
            break;
          case VALUE:
            missingValue = 0L;
            break;
          default:
            throw new AssertionError();
        }
        return new SparseNumericDocValuesRandomAccessWrapper(values, missingValue);
      default:
        throw new AssertionError();
    }
  }

  static final class SparseNumericDocValues extends NumericDocValues {

    final int docIDsLength;
    final LongValues docIds, values;

    int index, doc;

    SparseNumericDocValues(int docIDsLength, LongValues docIDs, LongValues values) {
      this.docIDsLength = docIDsLength;
      this.docIds = docIDs;
      this.values = values;
      reset();
    }

    void reset() {
      index = -1;
      doc = -1;
    }

    @Override
    public int docID() {
      return doc;
    }

    @Override
    public int nextDoc() throws IOException {
      if (index >= docIDsLength - 1) {
        index = docIDsLength;
        return doc = NO_MORE_DOCS;
      }
      return doc = (int) docIds.get(++index);
    }

    @Override
    public int advance(int target) throws IOException {
      long loIndex = index;
      long step = 1;
      long hiIndex;
      int hiDoc;

      // gallop forward by exponentially growing the interval
      // in order to find an interval so that the target doc
      // is in ]lo, hi]. Compared to a regular binary search,
      // this optimizes the case that the caller performs many
      // advance calls by small deltas
      do {
        hiIndex = index + step;
        if (hiIndex >= docIDsLength) {
          hiIndex = docIDsLength;
          hiDoc = NO_MORE_DOCS;
          break;
        }
        hiDoc = (int) docIds.get(hiIndex);
        if (hiDoc >= target) {
          break;
        }
        step <<= 1;
      } while (true);

      // now binary search
      while (loIndex + 1 < hiIndex) {
        final long midIndex = (loIndex + 1 + hiIndex) >>> 1;
        final int midDoc = (int) docIds.get(midIndex);
        if (midDoc >= target) {
          hiIndex = midIndex;
          hiDoc = midDoc;
        } else {
          loIndex = midIndex;
        }
      }

      index = (int) hiIndex;
      return doc = hiDoc;
    }

    @Override
    public boolean advanceExact(int target) throws IOException {
      if (advance(target) == target) {
        return true;
      }
      --index;
      doc = target;
      return index >= 0 && docIds.get(index) == target;
    }

    @Override
    public long longValue() {
      assert index >= 0;
      assert index < docIDsLength;
      return values.get(index);
    }

    @Override
    public long cost() {
      return docIDsLength;
    }
  }

  static class SparseNumericDocValuesRandomAccessWrapper extends LongValues {

    final SparseNumericDocValues values;
    final long missingValue;

    SparseNumericDocValuesRandomAccessWrapper(SparseNumericDocValues values, long missingValue) {
      this.values = values;
      this.missingValue = missingValue;
    }

    @Override
    public long get(long longIndex) {
      final int index = Math.toIntExact(longIndex);
      int doc = values.docID();
      if (doc >= index) {
        values.reset();
      }
      assert values.docID() < index;
      try {
        doc = values.advance(index);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
      if (doc == index) {
        return values.longValue();
      } else {
        return missingValue;
      }
    }

  }

  LegacyBinaryDocValues getLegacyBinary(FieldInfo field) throws IOException {
    BinaryEntry bytes = binaries.get(field.name);
    switch(bytes.format) {
      case BINARY_FIXED_UNCOMPRESSED:
        return getFixedBinary(field, bytes);
      case BINARY_VARIABLE_UNCOMPRESSED:
        return getVariableBinary(field, bytes);
      case BINARY_PREFIX_COMPRESSED:
        return getCompressedBinary(field, bytes);
      default:
        throw new AssertionError();
    }
  }

  @Override
  public BinaryDocValues getBinary(FieldInfo field) throws IOException {
    BinaryEntry be = binaries.get(field.name);
    return new LegacyBinaryDocValuesWrapper(getLiveBits(be.missingOffset, maxDoc), getLegacyBinary(field));
  }

  private LegacyBinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
    final IndexInput data = this.data.slice("fixed-binary", bytes.offset, bytes.count * bytes.maxLength);

    final BytesRef term = new BytesRef(bytes.maxLength);
    final byte[] buffer = term.bytes;
    final int length = term.length = bytes.maxLength;

    return new LongBinaryDocValues() {
      @Override
      public BytesRef get(long id) {
        try {
          data.seek(id * length);
          data.readBytes(buffer, 0, buffer.length);
          return term;
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    };
  }

  private LegacyBinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
    final RandomAccessInput addressesData = this.data.randomAccessSlice(bytes.addressesOffset, bytes.addressesEndOffset - bytes.addressesOffset);
    final LongValues addresses = DirectMonotonicReader.getInstance(bytes.addressesMeta, addressesData);

    final IndexInput data = this.data.slice("var-binary", bytes.offset, bytes.addressesOffset - bytes.offset);
    final BytesRef term = new BytesRef(Math.max(0, bytes.maxLength));
    final byte buffer[] = term.bytes;

    return new LongBinaryDocValues() {
      @Override
      public BytesRef get(long id) {
        long startAddress = addresses.get(id);
        long endAddress = addresses.get(id+1);
        int length = (int) (endAddress - startAddress);
        try {
          data.seek(startAddress);
          data.readBytes(buffer, 0, length);
          term.length = length;
          return term;
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    };
  }

  /** returns an address instance for prefix-compressed binary values. */
  private synchronized MonotonicBlockPackedReader getIntervalInstance(FieldInfo field, BinaryEntry bytes) throws IOException {
    MonotonicBlockPackedReader addresses = addressInstances.get(field.name);
    if (addresses == null) {
      data.seek(bytes.addressesOffset);
      final long size = (bytes.count + INTERVAL_MASK) >>> INTERVAL_SHIFT;
      addresses = MonotonicBlockPackedReader.of(data, bytes.packedIntsVersion, bytes.blockSize, size, false);
      if (!merging) {
        addressInstances.put(field.name, addresses);
        ramBytesUsed.addAndGet(addresses.ramBytesUsed() + Integer.BYTES);
      }
    }
    return addresses;
  }

  /** returns a reverse lookup instance for prefix-compressed binary values. */
  private synchronized ReverseTermsIndex getReverseIndexInstance(FieldInfo field, BinaryEntry bytes) throws IOException {
    ReverseTermsIndex index = reverseIndexInstances.get(field.name);
    if (index == null) {
      index = new ReverseTermsIndex();
      data.seek(bytes.reverseIndexOffset);
      long size = (bytes.count + REVERSE_INTERVAL_MASK) >>> REVERSE_INTERVAL_SHIFT;
      index.termAddresses = MonotonicBlockPackedReader.of(data, bytes.packedIntsVersion, bytes.blockSize, size, false);
      long dataSize = data.readVLong();
      PagedBytes pagedBytes = new PagedBytes(15);
      pagedBytes.copy(data, dataSize);
      index.terms = pagedBytes.freeze(true);
      if (!merging) {
        reverseIndexInstances.put(field.name, index);
        ramBytesUsed.addAndGet(index.ramBytesUsed());
      }
    }
    return index;
  }

  private LegacyBinaryDocValues getCompressedBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
    final MonotonicBlockPackedReader addresses = getIntervalInstance(field, bytes);
    final ReverseTermsIndex index = getReverseIndexInstance(field, bytes);
    assert addresses.size() > 0; // we don't have to handle empty case
    IndexInput slice = data.slice("terms", bytes.offset, bytes.addressesOffset - bytes.offset);
    return new CompressedBinaryDocValues(bytes, addresses, index, slice);
  }

  @Override
  public SortedDocValues getSorted(FieldInfo field) throws IOException {
    final int valueCount = (int) binaries.get(field.name).count;
    final LegacyBinaryDocValues binary = getLegacyBinary(field);
    NumericEntry entry = ords.get(field.name);
    final LongValues ordinals = getNumeric(entry);
    if (entry.format == SPARSE_COMPRESSED) {
      final SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) ordinals).values;
      return new SortedDocValues() {

        @Override
        public int ordValue() {
          return (int) sparseValues.longValue();
        }

        @Override
        public BytesRef lookupOrd(int ord) {
          return binary.get(ord);
        }

        @Override
        public int getValueCount() {
          return valueCount;
        }

        @Override
        public int docID() {
          return sparseValues.docID();
        }

        @Override
        public int nextDoc() throws IOException {
          return sparseValues.nextDoc();
        }

        @Override
        public int advance(int target) throws IOException {
          return sparseValues.advance(target);
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
          return sparseValues.advanceExact(target);
        }

        @Override
        public long cost() {
          return sparseValues.cost();
        }

      };
    }
    return new SortedDocValues() {
      private int docID = -1;
      private int ord;

      @Override
      public int docID() {
        return docID;
      }

      @Override
      public int nextDoc() throws IOException {
        assert docID != NO_MORE_DOCS;
        while (true) {
          docID++;
          if (docID == maxDoc) {
            docID = NO_MORE_DOCS;
            break;
          }
          ord = (int) ordinals.get(docID);
          if (ord != -1) {
            break;
          }
        }
        return docID;
      }

      @Override
      public int advance(int target) throws IOException {
        if (target >= maxDoc) {
          docID = NO_MORE_DOCS;
          return docID;
        } else {
          docID = target-1;
          return nextDoc();
        }
      }

      @Override
      public boolean advanceExact(int target) throws IOException {
        docID = target;
        ord = (int) ordinals.get(target);
        return ord != -1;
      }

      @Override
      public int ordValue() {
        return ord;
      }

      @Override
      public long cost() {
        // TODO
        return 0;
      }

      @Override
      public BytesRef lookupOrd(int ord) {
        return binary.get(ord);
      }

      @Override
      public int getValueCount() {
        return valueCount;
      }

      @Override
      public int lookupTerm(BytesRef key) throws IOException {
        if (binary instanceof CompressedBinaryDocValues) {
          return (int) ((CompressedBinaryDocValues)binary).lookupTerm(key);
        } else {
          return super.lookupTerm(key);
        }
      }

      @Override
      public TermsEnum termsEnum() throws IOException {
        if (binary instanceof CompressedBinaryDocValues) {
          return ((CompressedBinaryDocValues)binary).getTermsEnum();
        } else {
          return super.termsEnum();
        }
      }
    };
  }

  /** returns an address instance for sortedset ordinal lists */
  private LongValues getOrdIndexInstance(FieldInfo field, NumericEntry entry) throws IOException {
    RandomAccessInput data = this.data.randomAccessSlice(entry.offset, entry.endOffset - entry.offset);
    return DirectMonotonicReader.getInstance(entry.monotonicMeta, data);
  }

  @Override
  public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
    SortedSetEntry ss = sortedNumerics.get(field.name);
    if (ss.format == SORTED_SINGLE_VALUED) {
      NumericEntry numericEntry = numerics.get(field.name);
      final LongValues values = getNumeric(numericEntry);
      if (numericEntry.format == SPARSE_COMPRESSED) {
        SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) values).values;
        return new SortedNumericDocValues() {

          @Override
          public long nextValue() throws IOException {
            return sparseValues.longValue();
          }

          @Override
          public int docValueCount() {
            return 1;
          }

          @Override
          public int docID() {
            return sparseValues.docID();
          }

          @Override
          public int nextDoc() throws IOException {
            return sparseValues.nextDoc();
          }

          @Override
          public int advance(int target) throws IOException {
            return sparseValues.advance(target);
          }

          @Override
          public boolean advanceExact(int target) throws IOException {
            return sparseValues.advanceExact(target);
          }

          @Override
          public long cost() {
            return sparseValues.cost();
          }

        };
      }
      final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
      return new SortedNumericDocValues() {
        int docID = -1;

        @Override
        public int docID() {
          return docID;
        }

        @Override
        public int nextDoc() {
          while (true) {
            docID++;
            if (docID == maxDoc) {
              docID = NO_MORE_DOCS;
              break;
            }

            if (docsWithField.get(docID)) {
              // TODO: use .nextSetBit here, at least!!
              break;
            }
          }
          return docID;
        }

        @Override
        public int advance(int target) {
          if (target >= maxDoc) {
            docID = NO_MORE_DOCS;
            return docID;
          } else {
            docID = target-1;
            return nextDoc();
          }
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
          docID = target;
          return docsWithField.get(docID);
        }

        @Override
        public long cost() {
          // TODO
          return 0;
        }

        @Override
        public int docValueCount() {
          return 1;
        }

        @Override
        public long nextValue() {
          return values.get(docID);
        }
      };
    } else if (ss.format == SORTED_WITH_ADDRESSES) {
      NumericEntry numericEntry = numerics.get(field.name);
      final LongValues values = getNumeric(numericEntry);
      final LongValues ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));

      return new SortedNumericDocValues() {
        long startOffset;
        long endOffset;
        int docID = -1;
        long upto;

        @Override
        public int docID() {
          return docID;
        }

        @Override
        public int nextDoc() {
          while (true) {
            docID++;
            if (docID == maxDoc) {
              docID = NO_MORE_DOCS;
              return docID;
            }
            startOffset = ordIndex.get(docID);
            endOffset = ordIndex.get(docID+1L);
            if (endOffset > startOffset) {
              break;
            }
          }
          upto = startOffset;
          return docID;
        }

        @Override
        public int advance(int target) {
          if (target >= maxDoc) {
            docID = NO_MORE_DOCS;
            return docID;
          } else {
            docID = target-1;
            return nextDoc();
          }
        }
        
        @Override
        public boolean advanceExact(int target) throws IOException {
          docID = target;
          startOffset = ordIndex.get(docID);
          endOffset = ordIndex.get(docID+1L);
          return endOffset > startOffset;
        }
        
        @Override
        public long cost() {
          // TODO
          return 0;
        }
        
        @Override
        public int docValueCount() {
          return (int) (endOffset - startOffset);
        }
        
        @Override
        public long nextValue() {
          return values.get(upto++);
        }
      };
    } else if (ss.format == SORTED_SET_TABLE) {
      NumericEntry entry = ords.get(field.name);
      final LongValues ordinals = getNumeric(entry);

      final long[] table = ss.table;
      final int[] offsets = ss.tableOffsets;
      return new SortedNumericDocValues() {
        int startOffset;
        int endOffset;
        int docID = -1;
        int upto;

        @Override
        public int docID() {
          return docID;
        }

        @Override
        public int nextDoc() {
          while (true) {
            docID++;
            if (docID == maxDoc) {
              docID = NO_MORE_DOCS;
              return docID;
            }
            int ord = (int) ordinals.get(docID);
            startOffset = offsets[ord];
            endOffset = offsets[ord+1];
            if (endOffset > startOffset) {
              break;
            }
          }
          upto = startOffset;
          return docID;
        }

        @Override
        public int advance(int target) {
          if (target >= maxDoc) {
            docID = NO_MORE_DOCS;
            return docID;
          } else {
            docID = target-1;
            return nextDoc();
          }
        }
        
        @Override
        public boolean advanceExact(int target) throws IOException {
          docID = target;
          int ord = (int) ordinals.get(docID);
          startOffset = offsets[ord];
          endOffset = offsets[ord+1];
          return endOffset > startOffset;
        }
        
        @Override
        public long cost() {
          // TODO
          return 0;
        }

        @Override
        public int docValueCount() {
          return endOffset - startOffset;
        }
        
        @Override
        public long nextValue() {
          return table[upto++];
        }
      };
    } else {
      throw new AssertionError();
    }
  }

  @Override
  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
    SortedSetEntry ss = sortedSets.get(field.name);
    switch (ss.format) {
      case SORTED_SINGLE_VALUED:
        return DocValues.singleton(getSorted(field));
      case SORTED_WITH_ADDRESSES:
        return getSortedSetWithAddresses(field);
      case SORTED_SET_TABLE:
        return getSortedSetTable(field, ss);
      default:
        throw new AssertionError();
    }
  }

  private SortedSetDocValues getSortedSetWithAddresses(FieldInfo field) throws IOException {
    final long valueCount = binaries.get(field.name).count;
    // we keep the byte[]s and list of ords on disk, these could be large
    final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
    final LongValues ordinals = getNumeric(ords.get(field.name));
    // but the addresses to the ord stream are in RAM
    final LongValues ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));

    return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {
      long startOffset;
      long offset;
      long endOffset;

      @Override
      public long nextOrd() {
        if (offset == endOffset) {
          return NO_MORE_ORDS;
        } else {
          long ord = ordinals.get(offset);
          offset++;
          return ord;
        }
      }

      @Override
      public void setDocument(int docID) {
        startOffset = offset = ordIndex.get(docID);
        endOffset = ordIndex.get(docID+1L);
      }

      @Override
      public BytesRef lookupOrd(long ord) {
        return binary.get(ord);
      }

      @Override
      public long getValueCount() {
        return valueCount;
      }

      @Override
      public long lookupTerm(BytesRef key) {
        if (binary instanceof CompressedBinaryDocValues) {
          return ((CompressedBinaryDocValues)binary).lookupTerm(key);
        } else {
          return super.lookupTerm(key);
        }
      }

      @Override
      public TermsEnum termsEnum() throws IOException {
        if (binary instanceof CompressedBinaryDocValues) {
          return ((CompressedBinaryDocValues)binary).getTermsEnum();
        } else {
          return super.termsEnum();
        }
      }
      }, maxDoc);
  }

  private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException {
    final long valueCount = binaries.get(field.name).count;
    final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
    final NumericEntry ordinalsEntry = ords.get(field.name);
    final LongValues ordinals = getNumeric(ordinalsEntry);

    final long[] table = ss.table;
    final int[] offsets = ss.tableOffsets;

    return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {

      int offset, startOffset, endOffset;

      @Override
      public void setDocument(int docID) {
        final int ord = (int) ordinals.get(docID);
        offset = startOffset = offsets[ord];
        endOffset = offsets[ord + 1];
      }

      @Override
      public long nextOrd() {
        if (offset == endOffset) {
          return NO_MORE_ORDS;
        } else {
          return table[offset++];
        }
      }

      @Override
      public BytesRef lookupOrd(long ord) {
        return binary.get(ord);
      }

      @Override
      public long getValueCount() {
        return valueCount;
      }

      @Override
      public long lookupTerm(BytesRef key) {
        if (binary instanceof CompressedBinaryDocValues) {
          return ((CompressedBinaryDocValues) binary).lookupTerm(key);
        } else {
          return super.lookupTerm(key);
        }
      }

      @Override
      public TermsEnum termsEnum() throws IOException {
        if (binary instanceof CompressedBinaryDocValues) {
          return ((CompressedBinaryDocValues) binary).getTermsEnum();
        } else {
          return super.termsEnum();
        }
      }
      }, maxDoc);
  }

  private Bits getLiveBits(final long offset, final int count) throws IOException {
    if (offset == ALL_MISSING) {
      return new Bits.MatchNoBits(count);
    } else if (offset == ALL_LIVE) {
      return new Bits.MatchAllBits(count);
    } else {
      int length = (int) ((count + 7L) >>> 3);
      final RandomAccessInput in = data.randomAccessSlice(offset, length);
      return new Bits() {
        @Override
        public boolean get(int index) {
          try {
            return (in.readByte(index >> 3) & (1 << (index & 7))) != 0;
          } catch (IOException e) {
            throw new RuntimeException(e);
          }
        }

        @Override
        public int length() {
          return count;
        }
      };
    }
  }

  private SparseNumericDocValues getSparseNumericDocValues(NumericEntry entry) throws IOException {
    final RandomAccessInput docIdsData = this.data.randomAccessSlice(entry.missingOffset, entry.offset - entry.missingOffset);
    final LongValues docIDs = DirectMonotonicReader.getInstance(entry.monotonicMeta, docIdsData);
    final LongValues values = getNumeric(entry.nonMissingValues); // cannot be sparse
    return new SparseNumericDocValues(Math.toIntExact(entry.numDocsWithValue), docIDs, values);
  }

  @Override
  public synchronized DocValuesProducer getMergeInstance() throws IOException {
    return new Lucene54DocValuesProducer(this);
  }

  @Override
  public void close() throws IOException {
    data.close();
  }

  /** metadata entry for a numeric docvalues field */
  static class NumericEntry {
    private NumericEntry() {}
    /** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
    long missingOffset;
    /** offset to the actual numeric values */
    public long offset;
    /** end offset to the actual numeric values */
    public long endOffset;
    /** bits per value used to pack the numeric values */
    public int bitsPerValue;

    int format;
    /** count of values written */
    public long count;

    /** monotonic meta */
    public DirectMonotonicReader.Meta monotonicMeta;

    long minValue;
    long gcd;
    long table[];

    /** for sparse compression */
    long numDocsWithValue;
    NumericEntry nonMissingValues;
    NumberType numberType;

  }

  /** metadata entry for a binary docvalues field */
  static class BinaryEntry {
    private BinaryEntry() {}
    /** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
    long missingOffset;
    /** offset to the actual binary values */
    long offset;

    int format;
    /** count of values written */
    public long count;
    int minLength;
    int maxLength;
    /** offset to the addressing data that maps a value to its slice of the byte[] */
    public long addressesOffset, addressesEndOffset;
    /** meta data for addresses */
    public DirectMonotonicReader.Meta addressesMeta;
    /** offset to the reverse index */
    public long reverseIndexOffset;
    /** packed ints version used to encode addressing information */
    public int packedIntsVersion;
    /** packed ints blocksize */
    public int blockSize;
  }

  /** metadata entry for a sorted-set docvalues field */
  static class SortedSetEntry {
    private SortedSetEntry() {}
    int format;

    long[] table;
    int[] tableOffsets;
  }

  // internally we compose complex dv (sorted/sortedset) from other ones
  static abstract class LongBinaryDocValues extends LegacyBinaryDocValues {
    @Override
    public final BytesRef get(int docID) {
      return get((long)docID);
    }

    abstract BytesRef get(long id);
  }

  // used for reverse lookup to a small range of blocks
  static class ReverseTermsIndex implements Accountable {
    public MonotonicBlockPackedReader termAddresses;
    public PagedBytes.Reader terms;

    @Override
    public long ramBytesUsed() {
      return termAddresses.ramBytesUsed() + terms.ramBytesUsed();
    }

    @Override
    public Collection<Accountable> getChildResources() {
      List<Accountable> resources = new ArrayList<>();
      resources.add(Accountables.namedAccountable("term bytes", terms));
      resources.add(Accountables.namedAccountable("term addresses", termAddresses));
      return Collections.unmodifiableList(resources);
    }

    @Override
    public String toString() {
      return getClass().getSimpleName() + "(size=" + termAddresses.size() + ")";
    }
  }

  //in the compressed case, we add a few additional operations for
  //more efficient reverse lookup and enumeration
  static final class CompressedBinaryDocValues extends LongBinaryDocValues {
    final long numValues;
    final long numIndexValues;
    final int maxTermLength;
    final MonotonicBlockPackedReader addresses;
    final IndexInput data;
    final CompressedBinaryTermsEnum termsEnum;
    final PagedBytes.Reader reverseTerms;
    final MonotonicBlockPackedReader reverseAddresses;
    final long numReverseIndexValues;

    public CompressedBinaryDocValues(BinaryEntry bytes, MonotonicBlockPackedReader addresses, ReverseTermsIndex index, IndexInput data) throws IOException {
      this.maxTermLength = bytes.maxLength;
      this.numValues = bytes.count;
      this.addresses = addresses;
      this.numIndexValues = addresses.size();
      this.data = data;
      this.reverseTerms = index.terms;
      this.reverseAddresses = index.termAddresses;
      this.numReverseIndexValues = reverseAddresses.size();
      this.termsEnum = getTermsEnum(data);
    }

    @Override
    public BytesRef get(long id) {
      try {
        termsEnum.seekExact(id);
        return termsEnum.term();
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    long lookupTerm(BytesRef key) {
      try {
        switch (termsEnum.seekCeil(key)) {
          case FOUND: return termsEnum.ord();
          case NOT_FOUND: return -termsEnum.ord()-1;
          default: return -numValues-1;
        }
      } catch (IOException bogus) {
        throw new RuntimeException(bogus);
      }
    }

    TermsEnum getTermsEnum() throws IOException {
      return getTermsEnum(data.clone());
    }

    private CompressedBinaryTermsEnum getTermsEnum(IndexInput input) throws IOException {
      return new CompressedBinaryTermsEnum(input);
    }

    class CompressedBinaryTermsEnum extends TermsEnum {
      private long currentOrd = -1;
      // offset to the start of the current block
      private long currentBlockStart;
      private final IndexInput input;
      // delta from currentBlockStart to start of each term
      private final int offsets[] = new int[INTERVAL_COUNT];
      private final byte buffer[] = new byte[2*INTERVAL_COUNT-1];

      private final BytesRef term = new BytesRef(maxTermLength);
      private final BytesRef firstTerm = new BytesRef(maxTermLength);
      private final BytesRef scratch = new BytesRef();

      CompressedBinaryTermsEnum(IndexInput input) throws IOException {
        this.input = input;
        input.seek(0);
      }

      private void readHeader() throws IOException {
        firstTerm.length = input.readVInt();
        input.readBytes(firstTerm.bytes, 0, firstTerm.length);
        input.readBytes(buffer, 0, INTERVAL_COUNT-1);
        if (buffer[0] == -1) {
          readShortAddresses();
        } else {
          readByteAddresses();
        }
        currentBlockStart = input.getFilePointer();
      }

      // read single byte addresses: each is delta - 2
      // (shared prefix byte and length > 0 are both implicit)
      private void readByteAddresses() throws IOException {
        int addr = 0;
        for (int i = 1; i < offsets.length; i++) {
          addr += 2 + (buffer[i-1] & 0xFF);
          offsets[i] = addr;
        }
      }

      // read double byte addresses: each is delta - 2
      // (shared prefix byte and length > 0 are both implicit)
      private void readShortAddresses() throws IOException {
        input.readBytes(buffer, INTERVAL_COUNT-1, INTERVAL_COUNT);
        int addr = 0;
        for (int i = 1; i < offsets.length; i++) {
          int x = i<<1;
          addr += 2 + ((buffer[x-1] << 8) | (buffer[x] & 0xFF));
          offsets[i] = addr;
        }
      }

      // set term to the first term
      private void readFirstTerm() throws IOException {
        term.length = firstTerm.length;
        System.arraycopy(firstTerm.bytes, firstTerm.offset, term.bytes, 0, term.length);
      }

      // read term at offset, delta encoded from first term
      private void readTerm(int offset) throws IOException {
        int start = input.readByte() & 0xFF;
        System.arraycopy(firstTerm.bytes, firstTerm.offset, term.bytes, 0, start);
        int suffix = offsets[offset] - offsets[offset-1] - 1;
        input.readBytes(term.bytes, start, suffix);
        term.length = start + suffix;
      }

      @Override
      public BytesRef next() throws IOException {
        currentOrd++;
        if (currentOrd >= numValues) {
          return null;
        } else {
          int offset = (int) (currentOrd & INTERVAL_MASK);
          if (offset == 0) {
            // switch to next block
            readHeader();
            readFirstTerm();
          } else {
            readTerm(offset);
          }
          return term;
        }
      }

      // binary search reverse index to find smaller
      // range of blocks to search
      long binarySearchIndex(BytesRef text) throws IOException {
        long low = 0;
        long high = numReverseIndexValues - 1;
        while (low <= high) {
          long mid = (low + high) >>> 1;
          reverseTerms.fill(scratch, reverseAddresses.get(mid));
          int cmp = scratch.compareTo(text);

          if (cmp < 0) {
            low = mid + 1;
          } else if (cmp > 0) {
            high = mid - 1;
          } else {
            return mid;
          }
        }
        return high;
      }

      // binary search against first term in block range
      // to find term's block
      long binarySearchBlock(BytesRef text, long low, long high) throws IOException {
        while (low <= high) {
          long mid = (low + high) >>> 1;
          input.seek(addresses.get(mid));
          term.length = input.readVInt();
          input.readBytes(term.bytes, 0, term.length);
          int cmp = term.compareTo(text);

          if (cmp < 0) {
            low = mid + 1;
          } else if (cmp > 0) {
            high = mid - 1;
          } else {
            return mid;
          }
        }
        return high;
      }

      @Override
      public SeekStatus seekCeil(BytesRef text) throws IOException {
        // locate block: narrow to block range with index, then search blocks
        final long block;
        long indexPos = binarySearchIndex(text);
        if (indexPos < 0) {
          block = 0;
        } else {
          long low = indexPos << BLOCK_INTERVAL_SHIFT;
          long high = Math.min(numIndexValues - 1, low + BLOCK_INTERVAL_MASK);
          block = Math.max(low, binarySearchBlock(text, low, high));
        }

        // position before block, then scan to term.
        input.seek(addresses.get(block));
        currentOrd = (block << INTERVAL_SHIFT) - 1;

        while (next() != null) {
          int cmp = term.compareTo(text);
          if (cmp == 0) {
            return SeekStatus.FOUND;
          } else if (cmp > 0) {
            return SeekStatus.NOT_FOUND;
          }
        }
        return SeekStatus.END;
      }

      @Override
      public void seekExact(long ord) throws IOException {
        long block = ord >>> INTERVAL_SHIFT;
        if (block != currentOrd >>> INTERVAL_SHIFT) {
          // switch to different block
          input.seek(addresses.get(block));
          readHeader();
        }

        currentOrd = ord;

        int offset = (int) (ord & INTERVAL_MASK);
        if (offset == 0) {
          readFirstTerm();
        } else {
          input.seek(currentBlockStart + offsets[offset-1]);
          readTerm(offset);
        }
      }

      @Override
      public BytesRef term() throws IOException {
        return term;
      }

      @Override
      public long ord() throws IOException {
        return currentOrd;
      }

      @Override
      public int docFreq() throws IOException {
        throw new UnsupportedOperationException();
      }

      @Override
      public long totalTermFreq() throws IOException {
        return -1;
      }

      @Override
      public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
        throw new UnsupportedOperationException();
      }

    }
  }
}