MemoryDocValuesProducer.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.memory;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.*;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;

/**
 * Reader for {@link MemoryDocValuesFormat}
 */
class MemoryDocValuesProducer extends DocValuesProducer {
  // metadata maps (just file pointers and minimal stuff)
  private final Map<String,NumericEntry> numerics = new HashMap<>();
  private final Map<String,BinaryEntry> binaries = new HashMap<>();
  private final Map<String,FSTEntry> fsts = new HashMap<>();
  private final Map<String,SortedSetEntry> sortedSets = new HashMap<>();
  private final Map<String,SortedNumericEntry> sortedNumerics = new HashMap<>();
  private final IndexInput data;
  
  // ram instances we have already loaded
  private final Map<String,LegacyNumericDocValues> numericInstances = new HashMap<>();
  private final Map<String,BytesAndAddresses> pagedBytesInstances = new HashMap<>();
  private final Map<String,FST<Long>> fstInstances = new HashMap<>();
  private final Map<String,FixedBitSet> docsWithFieldInstances = new HashMap<>();
  private final Map<String,MonotonicBlockPackedReader> addresses = new HashMap<>();
  
  private final Map<String,Accountable> numericInfo = new HashMap<>();

  private final int numEntries;
  private final int maxDoc;
  private final AtomicLong ramBytesUsed;
  private final int version;
  
  private final boolean merging;
  
  static final byte NUMBER = 0;
  static final byte BYTES = 1;
  static final byte FST = 2;
  static final byte SORTED_SET = 4;
  static final byte SORTED_SET_SINGLETON = 5;
  static final byte SORTED_NUMERIC = 6;
  static final byte SORTED_NUMERIC_SINGLETON = 7;

  static final int BLOCK_SIZE = 4096;
  
  static final byte DELTA_COMPRESSED = 0;
  static final byte TABLE_COMPRESSED = 1;
  static final byte BLOCK_COMPRESSED = 2;
  static final byte GCD_COMPRESSED = 3;
  
  static final int VERSION_START = 4;
  static final int VERSION_CURRENT = VERSION_START;
  
  // clone for merge: when merging we don't do any instances.put()s
  MemoryDocValuesProducer(MemoryDocValuesProducer original) throws IOException {
    assert Thread.holdsLock(original);
    numerics.putAll(original.numerics);
    binaries.putAll(original.binaries);
    fsts.putAll(original.fsts);
    sortedSets.putAll(original.sortedSets);
    sortedNumerics.putAll(original.sortedNumerics);
    data = original.data.clone();
    
    numericInstances.putAll(original.numericInstances);
    pagedBytesInstances.putAll(original.pagedBytesInstances);
    fstInstances.putAll(original.fstInstances);
    docsWithFieldInstances.putAll(original.docsWithFieldInstances);
    addresses.putAll(original.addresses);
    
    numericInfo.putAll(original.numericInfo);
    
    numEntries = original.numEntries;
    maxDoc = original.maxDoc;
    ramBytesUsed = new AtomicLong(original.ramBytesUsed.get());
    version = original.version;
    merging = true;
  }
    
  MemoryDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
    maxDoc = state.segmentInfo.maxDoc();
    merging = false;
    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    // read in the entries from the metadata file.
    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
    boolean success = false;
    try {
      version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, VERSION_CURRENT,
                                                 state.segmentInfo.getId(), state.segmentSuffix);
      numEntries = readFields(in, state.fieldInfos);
      CodecUtil.checkFooter(in);
      ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }

    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
    this.data = state.directory.openInput(dataName, state.context);
    success = false;
    try {
      final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, VERSION_START, VERSION_CURRENT,
                                                              state.segmentInfo.getId(), state.segmentSuffix);
      if (version != version2) {
        throw new CorruptIndexException("Format versions mismatch: meta=" + version + ", data=" + version2, data);
      }
      
      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(data);

      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
  }
  
  private NumericEntry readNumericEntry(IndexInput meta) throws IOException {
    NumericEntry entry = new NumericEntry();
    entry.offset = meta.readLong();
    entry.missingOffset = meta.readLong();
    if (entry.missingOffset != -1) {
      entry.missingBytes = meta.readLong();
    } else {
      entry.missingBytes = 0;
    }
    entry.format = meta.readByte();
    switch(entry.format) {
      case DELTA_COMPRESSED:
      case TABLE_COMPRESSED:
      case BLOCK_COMPRESSED:
      case GCD_COMPRESSED:
           break;
      default:
           throw new CorruptIndexException("Unknown format: " + entry.format, meta);
    }
    entry.packedIntsVersion = meta.readVInt();
    entry.count = meta.readLong();
    return entry;
  }
  
  private BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
    BinaryEntry entry = new BinaryEntry();
    entry.offset = meta.readLong();
    entry.numBytes = meta.readLong();
    entry.missingOffset = meta.readLong();
    if (entry.missingOffset != -1) {
      entry.missingBytes = meta.readLong();
    } else {
      entry.missingBytes = 0;
    }
    entry.minLength = meta.readVInt();
    entry.maxLength = meta.readVInt();
    if (entry.minLength != entry.maxLength) {
      entry.packedIntsVersion = meta.readVInt();
      entry.blockSize = meta.readVInt();
    }
    return entry;
  }
  
  private FSTEntry readFSTEntry(IndexInput meta) throws IOException {
    FSTEntry entry = new FSTEntry();
    entry.offset = meta.readLong();
    entry.numOrds = meta.readVLong();
    return entry;
  }
  
  private int readFields(IndexInput meta, FieldInfos infos) throws IOException {
    int numEntries = 0;
    int fieldNumber = meta.readVInt();
    while (fieldNumber != -1) {
      numEntries++;
      FieldInfo info = infos.fieldInfo(fieldNumber);
      if (info == null) {
        throw new CorruptIndexException("invalid field number: " + fieldNumber, meta);
      }
      int fieldType = meta.readByte();
      if (fieldType == NUMBER) {
        numerics.put(info.name, readNumericEntry(meta));
      } else if (fieldType == BYTES) {
        binaries.put(info.name, readBinaryEntry(meta));
      } else if (fieldType == FST) {
        fsts.put(info.name,readFSTEntry(meta));
      } else if (fieldType == SORTED_SET) {
        SortedSetEntry entry = new SortedSetEntry();
        entry.singleton = false;
        sortedSets.put(info.name, entry);
      } else if (fieldType == SORTED_SET_SINGLETON) {
        SortedSetEntry entry = new SortedSetEntry();
        entry.singleton = true;
        sortedSets.put(info.name, entry);
      } else if (fieldType == SORTED_NUMERIC) {
        SortedNumericEntry entry = new SortedNumericEntry();
        entry.singleton = false;
        entry.packedIntsVersion = meta.readVInt();
        entry.blockSize = meta.readVInt();
        entry.addressOffset = meta.readLong();
        entry.valueCount = meta.readLong();
        sortedNumerics.put(info.name, entry);
      } else if (fieldType == SORTED_NUMERIC_SINGLETON) {
        SortedNumericEntry entry = new SortedNumericEntry();
        entry.singleton = true;
        sortedNumerics.put(info.name, entry);
      } else {
        throw new CorruptIndexException("invalid entry type: " + fieldType + ", fieldName=" + info.name, meta);
      }
      fieldNumber = meta.readVInt();
    }
    return numEntries;
  }

  @Override
  public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
    LegacyNumericDocValues instance = numericInstances.get(field.name);
    if (instance == null) {
      instance = loadNumeric(field);
      if (!merging) {
        numericInstances.put(field.name, instance);
      }
    }
    NumericEntry ne = numerics.get(field.name);
    return new LegacyNumericDocValuesWrapper(getMissingBits(field, ne.missingOffset, ne.missingBytes), instance);
  }

  private synchronized LegacyNumericDocValues getNumericNonIterator(FieldInfo field) throws IOException {
    LegacyNumericDocValues instance = numericInstances.get(field.name);
    if (instance == null) {
      instance = loadNumeric(field);
      if (!merging) {
        numericInstances.put(field.name, instance);
      }
    }
    return instance;
  }
  
  @Override
  public long ramBytesUsed() {
    return ramBytesUsed.get();
  }
  
  @Override
  public synchronized Collection<Accountable> getChildResources() {
    List<Accountable> resources = new ArrayList<>();
    resources.addAll(Accountables.namedAccountables("numeric field", numericInfo));
    resources.addAll(Accountables.namedAccountables("pagedbytes field", pagedBytesInstances));
    resources.addAll(Accountables.namedAccountables("term dict field", fstInstances));
    resources.addAll(Accountables.namedAccountables("missing bitset field", docsWithFieldInstances));
    resources.addAll(Accountables.namedAccountables("addresses field", addresses));
    return Collections.unmodifiableList(resources);
  }

  @Override
  public void checkIntegrity() throws IOException {
    CodecUtil.checksumEntireFile(data.clone());
  }
  
  @Override
  public synchronized DocValuesProducer getMergeInstance() throws IOException {
    return new MemoryDocValuesProducer(this);
  }

  @Override
  public String toString() {
    return getClass().getSimpleName() + "(entries=" + numEntries + ")";
  }

  private LegacyNumericDocValues loadNumeric(FieldInfo field) throws IOException {
    NumericEntry entry = numerics.get(field.name);
    IndexInput data = this.data.clone();
    data.seek(entry.offset + entry.missingBytes);
    switch (entry.format) {
      case TABLE_COMPRESSED:
        int size = data.readVInt();
        if (size > 256) {
          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, got=" + size, data);
        }
        final long decode[] = new long[size];
        for (int i = 0; i < decode.length; i++) {
          decode[i] = data.readLong();
        }
        final int formatID = data.readVInt();
        final int bitsPerValue = data.readVInt();
        final PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID), entry.packedIntsVersion, (int)entry.count, bitsPerValue);
        if (!merging) {
          ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed());
          numericInfo.put(field.name, Accountables.namedAccountable("table compressed", ordsReader));
        }
        return new LegacyNumericDocValues() {
          @Override
          public long get(int docID) {
            return decode[(int)ordsReader.get(docID)];
          }
        };
      case DELTA_COMPRESSED:
        final long minDelta = data.readLong();
        final int formatIDDelta = data.readVInt();
        final int bitsPerValueDelta = data.readVInt();
        final PackedInts.Reader deltaReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatIDDelta), entry.packedIntsVersion, (int)entry.count, bitsPerValueDelta);
        if (!merging) {
          ramBytesUsed.addAndGet(deltaReader.ramBytesUsed());
          numericInfo.put(field.name, Accountables.namedAccountable("delta compressed", deltaReader));
        }
        return new LegacyNumericDocValues() {
          @Override
          public long get(int docID) {
            return minDelta + deltaReader.get(docID);
          }
        };
      case BLOCK_COMPRESSED:
        final int blockSize = data.readVInt();
        final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, entry.count, false);
        if (!merging) {
          ramBytesUsed.addAndGet(reader.ramBytesUsed());
          numericInfo.put(field.name, Accountables.namedAccountable("block compressed", reader));
        }
        return new LegacyNumericDocValues() {
          @Override
          public long get(int docID) {
            return reader.get(docID);
          }
        };
      case GCD_COMPRESSED:
        final long min = data.readLong();
        final long mult = data.readLong();
        final int formatIDGCD = data.readVInt();
        final int bitsPerValueGCD = data.readVInt();
        final PackedInts.Reader quotientReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatIDGCD), entry.packedIntsVersion, (int)entry.count, bitsPerValueGCD);
        if (!merging) {
          ramBytesUsed.addAndGet(quotientReader.ramBytesUsed());
          numericInfo.put(field.name, Accountables.namedAccountable("gcd compressed", quotientReader));
        }
        return new LegacyNumericDocValues() {
          @Override
          public long get(int docID) {
            return min + mult * quotientReader.get(docID);
          }
        };
      default:
        throw new AssertionError();
    }
  }

  private LegacyBinaryDocValues getLegacyBinary(FieldInfo field) throws IOException {
    BinaryEntry entry = binaries.get(field.name);

    BytesAndAddresses instance;
    synchronized (this) {
      instance = pagedBytesInstances.get(field.name);
      if (instance == null) {
        instance = loadBinary(field);
        if (!merging) {
          pagedBytesInstances.put(field.name, instance);
        }
      }
    }
    final PagedBytes.Reader bytesReader = instance.reader;
    final MonotonicBlockPackedReader addresses = instance.addresses;

    if (addresses == null) {
      assert entry.minLength == entry.maxLength;
      final int fixedLength = entry.minLength;
      return new LegacyBinaryDocValues() {
        final BytesRef term = new BytesRef();

        @Override
        public BytesRef get(int docID) {
          bytesReader.fillSlice(term, fixedLength * (long)docID, fixedLength);
          return term;
        }
      };
    } else {
      return new LegacyBinaryDocValues() {
        final BytesRef term = new BytesRef();

        @Override
        public BytesRef get(int docID) {
          long startAddress = docID == 0 ? 0 : addresses.get(docID-1);
          long endAddress = addresses.get(docID);
          bytesReader.fillSlice(term, startAddress, (int) (endAddress - startAddress));
          return term;
        }
      };
    }
  }
  
  @Override
  public synchronized BinaryDocValues getBinary(FieldInfo field) throws IOException {
    BinaryEntry be = binaries.get(field.name);
    return new LegacyBinaryDocValuesWrapper(getMissingBits(field, be.missingOffset, be.missingBytes), getLegacyBinary(field));
  }

  private BytesAndAddresses loadBinary(FieldInfo field) throws IOException {
    BytesAndAddresses bytesAndAddresses = new BytesAndAddresses();
    BinaryEntry entry = binaries.get(field.name);
    IndexInput data = this.data.clone();
    data.seek(entry.offset);
    PagedBytes bytes = new PagedBytes(16);
    bytes.copy(data, entry.numBytes);
    bytesAndAddresses.reader = bytes.freeze(true);
    if (!merging) {
      ramBytesUsed.addAndGet(bytesAndAddresses.reader.ramBytesUsed());
    }
    if (entry.minLength != entry.maxLength) {
      data.seek(data.getFilePointer() + entry.missingBytes);
      bytesAndAddresses.addresses = MonotonicBlockPackedReader.of(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false);
      if (!merging) {
        ramBytesUsed.addAndGet(bytesAndAddresses.addresses.ramBytesUsed());
      }
    }
    return bytesAndAddresses;
  }
  
  @Override
  public SortedDocValues getSorted(FieldInfo field) throws IOException {
    return new LegacySortedDocValuesWrapper(getSortedNonIterator(field), maxDoc);
  }
  
  private LegacySortedDocValues getSortedNonIterator(FieldInfo field) throws IOException {
    final FSTEntry entry = fsts.get(field.name);
    if (entry.numOrds == 0) {
      return DocValues.emptyLegacySorted();
    }
    FST<Long> instance;
    synchronized(this) {
      instance = fstInstances.get(field.name);
      if (instance == null) {
        IndexInput data = this.data.clone();
        data.seek(entry.offset);
        instance = new FST<>(data, PositiveIntOutputs.getSingleton());
        if (!merging) {
          ramBytesUsed.addAndGet(instance.ramBytesUsed());
          fstInstances.put(field.name, instance);
        }
      }
    }
    final LegacyNumericDocValues docToOrd = getNumericNonIterator(field);
    final FST<Long> fst = instance;
    
    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    
    return new LegacySortedDocValues() {
      final BytesRefBuilder term = new BytesRefBuilder();

      @Override
      public int getOrd(int docID) {
        return (int) docToOrd.get(docID);
      }

      @Override
      public BytesRef lookupOrd(int ord) {
        try {
          in.setPosition(0);
          fst.getFirstArc(firstArc);
          IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
          return Util.toBytesRef(output, term);
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public int lookupTerm(BytesRef key) {
        try {
          InputOutput<Long> o = fstEnum.seekCeil(key);
          if (o == null) {
            return -getValueCount()-1;
          } else if (o.input.equals(key)) {
            return o.output.intValue();
          } else {
            return (int) -o.output-1;
          }
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public int getValueCount() {
        return (int)entry.numOrds;
      }

      @Override
      public TermsEnum termsEnum() {
        return new FSTTermsEnum(fst);
      }
    };
  }
  
  @Override
  public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
    SortedNumericEntry entry = sortedNumerics.get(field.name);
    if (entry.singleton) {
      LegacyNumericDocValues values = getNumericNonIterator(field);
      NumericEntry ne = numerics.get(field.name);
      Bits docsWithField = getMissingBits(field, ne.missingOffset, ne.missingBytes);
      return DocValues.singleton(new LegacyNumericDocValuesWrapper(docsWithField, values));
    } else {
      final LegacyNumericDocValues values = getNumericNonIterator(field);
      final MonotonicBlockPackedReader addr;
      synchronized (this) {
        MonotonicBlockPackedReader res = addresses.get(field.name);
        if (res == null) {
          IndexInput data = this.data.clone();
          data.seek(entry.addressOffset);
          res = MonotonicBlockPackedReader.of(data, entry.packedIntsVersion, entry.blockSize, entry.valueCount, false);
          if (!merging) {
            addresses.put(field.name, res);
            ramBytesUsed.addAndGet(res.ramBytesUsed());
          }
        }
        addr = res;
      }
      return new LegacySortedNumericDocValuesWrapper(new LegacySortedNumericDocValues() {
        int startOffset;
        int endOffset;
      
        @Override
        public void setDocument(int doc) {
          startOffset = (int) addr.get(doc);
          endOffset = (int) addr.get(doc+1);
        }

        @Override
        public long valueAt(int index) {
          return values.get(startOffset + index);
        }

        @Override
        public int count() {
          return (endOffset - startOffset);
        }
        }, maxDoc);
    }
  }
  
  @Override
  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
    SortedSetEntry sortedSetEntry = sortedSets.get(field.name);
    if (sortedSetEntry.singleton) {
      return DocValues.singleton(getSorted(field));
    }
    
    final FSTEntry entry = fsts.get(field.name);
    if (entry.numOrds == 0) {
      return DocValues.emptySortedSet(); // empty FST!
    }
    FST<Long> instance;
    synchronized(this) {
      instance = fstInstances.get(field.name);
      if (instance == null) {
        IndexInput data = this.data.clone();
        data.seek(entry.offset);
        instance = new FST<>(data, PositiveIntOutputs.getSingleton());
        if (!merging) {
          ramBytesUsed.addAndGet(instance.ramBytesUsed());
          fstInstances.put(field.name, instance);
        }
      }
    }
    final LegacyBinaryDocValues docToOrds = getLegacyBinary(field);
    final FST<Long> fst = instance;
    
    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    final ByteArrayDataInput input = new ByteArrayDataInput();
    return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {
      final BytesRefBuilder term = new BytesRefBuilder();
      BytesRef ref;
      long currentOrd;

      @Override
      public long nextOrd() {
        if (input.eof()) {
          return NO_MORE_ORDS;
        } else {
          currentOrd += input.readVLong();
          return currentOrd;
        }
      }
      
      @Override
      public void setDocument(int docID) {
        ref = docToOrds.get(docID);
        input.reset(ref.bytes, ref.offset, ref.length);
        currentOrd = 0;
      }

      @Override
      public BytesRef lookupOrd(long ord) {
        try {
          in.setPosition(0);
          fst.getFirstArc(firstArc);
          IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
          return Util.toBytesRef(output, term);
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public long lookupTerm(BytesRef key) {
        try {
          InputOutput<Long> o = fstEnum.seekCeil(key);
          if (o == null) {
            return -getValueCount()-1;
          } else if (o.input.equals(key)) {
            return o.output.intValue();
          } else {
            return -o.output-1;
          }
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public long getValueCount() {
        return entry.numOrds;
      }

      @Override
      public TermsEnum termsEnum() {
        return new FSTTermsEnum(fst);
      }
      }, maxDoc);
  }
  
  private Bits getMissingBits(FieldInfo field, final long offset, final long length) throws IOException {
    if (offset == -1) {
      return new Bits.MatchAllBits(maxDoc);
    } else {
      FixedBitSet instance;
      synchronized(this) {
        instance = docsWithFieldInstances.get(field.name);
        if (instance == null) {
          IndexInput data = this.data.clone();
          data.seek(offset);
          assert length % 8 == 0;
          long bits[] = new long[(int) length >> 3];
          for (int i = 0; i < bits.length; i++) {
            bits[i] = data.readLong();
          }
          instance = new FixedBitSet(bits, maxDoc);
          if (!merging) {
            docsWithFieldInstances.put(field.name, instance);
            ramBytesUsed.addAndGet(instance.ramBytesUsed());
          }
        }
      }
      return instance;
    }
  }
  
  @Override
  public void close() throws IOException {
    data.close();
  }
  
  static class NumericEntry {
    long offset;
    long count;
    long missingOffset;
    long missingBytes;
    byte format;
    int packedIntsVersion;
  }
  
  static class BinaryEntry {
    long offset;
    long missingOffset;
    long missingBytes;
    long numBytes;
    int minLength;
    int maxLength;
    int packedIntsVersion;
    int blockSize;
  }
  
  static class FSTEntry {
    long offset;
    long numOrds;
  }
  
  static class SortedSetEntry {
    boolean singleton;
  }
  
  static class SortedNumericEntry {
    boolean singleton;
    long addressOffset;
    int packedIntsVersion;
    int blockSize;
    long valueCount;
  }

  static class BytesAndAddresses implements Accountable {
    PagedBytes.Reader reader;
    MonotonicBlockPackedReader addresses;
    
    @Override
    public long ramBytesUsed() {
      long bytesUsed = reader.ramBytesUsed();
      if (addresses != null) {
        bytesUsed += addresses.ramBytesUsed();
      }
      return bytesUsed;
    }
    
    @Override
    public Collection<Accountable> getChildResources() {
      List<Accountable> resources = new ArrayList<>();
      if (addresses != null) {
        resources.add(Accountables.namedAccountable("addresses", addresses));
      }
      resources.add(Accountables.namedAccountable("term bytes", reader));
      return Collections.unmodifiableList(resources);
    }
  }

  // exposes FSTEnum directly as a TermsEnum: avoids binary-search next()
  static class FSTTermsEnum extends TermsEnum {
    final BytesRefFSTEnum<Long> in;
    
    // this is all for the complicated seek(ord)...
    // maybe we should add a FSTEnum that supports this operation?
    final FST<Long> fst;
    final FST.BytesReader bytesReader;
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefBuilder scratchBytes = new BytesRefBuilder();
    
    FSTTermsEnum(FST<Long> fst) {
      this.fst = fst;
      in = new BytesRefFSTEnum<>(fst);
      bytesReader = fst.getBytesReader();
    }

    @Override
    public BytesRef next() throws IOException {
      InputOutput<Long> io = in.next();
      if (io == null) {
        return null;
      } else {
        return io.input;
      }
    }

    @Override
    public SeekStatus seekCeil(BytesRef text) throws IOException {
      if (in.seekCeil(text) == null) {
        return SeekStatus.END;
      } else if (term().equals(text)) {
        // TODO: add SeekStatus to FSTEnum like in https://issues.apache.org/jira/browse/LUCENE-3729
        // to remove this comparision?
        return SeekStatus.FOUND;
      } else {
        return SeekStatus.NOT_FOUND;
      }
    }

    @Override
    public boolean seekExact(BytesRef text) throws IOException {
      if (in.seekExact(text) == null) {
        return false;
      } else {
        return true;
      }
    }

    @Override
    public void seekExact(long ord) throws IOException {
      // TODO: would be better to make this simpler and faster.
      // but we dont want to introduce a bug that corrupts our enum state!
      bytesReader.setPosition(0);
      fst.getFirstArc(firstArc);
      IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
      // TODO: we could do this lazily, better to try to push into FSTEnum though?
      in.seekExact(Util.toBytesRef(output, new BytesRefBuilder()));
    }

    @Override
    public BytesRef term() throws IOException {
      return in.current().input;
    }

    @Override
    public long ord() throws IOException {
      return in.current().output;
    }

    @Override
    public int docFreq() throws IOException {
      throw new UnsupportedOperationException();
    }

    @Override
    public long totalTermFreq() throws IOException {
      throw new UnsupportedOperationException();
    }

    @Override
    public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
      throw new UnsupportedOperationException();
    }

  }
}