MultiDocValues.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;

/**
 * A wrapper for CompositeIndexReader providing access to DocValues.
 * 
 * <p><b>NOTE</b>: for multi readers, you'll get better
 * performance by gathering the sub readers using
 * {@link IndexReader#getContext()} to get the
 * atomic leaves and then operate per-LeafReader,
 * instead of using this class.
 * 
 * <p><b>NOTE</b>: This is very costly.
 *
 * @lucene.experimental
 * @lucene.internal
 */
public class MultiDocValues {
  
  /** No instantiation */
  private MultiDocValues() {}
  
  /** Returns a NumericDocValues for a reader's norms (potentially merging on-the-fly).
   * <p>
   * This is a slow way to access normalization values. Instead, access them per-segment
   * with {@link LeafReader#getNormValues(String)}
   * </p> 
   */
  public static NumericDocValues getNormValues(final IndexReader r, final String field) throws IOException {
    final List<LeafReaderContext> leaves = r.leaves();
    final int size = leaves.size();
    if (size == 0) {
      return null;
    } else if (size == 1) {
      return leaves.get(0).reader().getNormValues(field);
    }
    FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo(field);
    if (fi == null || fi.hasNorms() == false) {
      return null;
    }

    return new NumericDocValues() {
      private int nextLeaf;
      private NumericDocValues currentValues;
      private LeafReaderContext currentLeaf;
      private int docID = -1;

      @Override
      public int nextDoc() throws IOException {
        while (true) {
          if (currentValues == null) {
            if (nextLeaf == leaves.size()) {
              docID = NO_MORE_DOCS;
              return docID;
            }
            currentLeaf = leaves.get(nextLeaf);
            currentValues = currentLeaf.reader().getNormValues(field);
            nextLeaf++;
            continue;
          }

          int newDocID = currentValues.nextDoc();

          if (newDocID == NO_MORE_DOCS) {
            currentValues = null;
            continue;
          } else {
            docID = currentLeaf.docBase + newDocID;
            return docID;
          }
        }
      }
        
      @Override
      public int docID() {
        return docID;
      }

      @Override
      public int advance(int targetDocID) throws IOException {
        if (targetDocID <= docID) {
          throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
        }
        int readerIndex = ReaderUtil.subIndex(targetDocID, leaves);
        if (readerIndex >= nextLeaf) {
          if (readerIndex == leaves.size()) {
            currentValues = null;
            docID = NO_MORE_DOCS;
            return docID;
          }
          currentLeaf = leaves.get(readerIndex);
          currentValues = currentLeaf.reader().getNormValues(field);
          if (currentValues == null) {
            return nextDoc();
          }
          nextLeaf = readerIndex+1;
        }
        int newDocID = currentValues.advance(targetDocID - currentLeaf.docBase);
        if (newDocID == NO_MORE_DOCS) {
          currentValues = null;
          return nextDoc();
        } else {
          docID = currentLeaf.docBase + newDocID;
          return docID;
        }
      }

      @Override
      public boolean advanceExact(int targetDocID) throws IOException {
        if (targetDocID < docID) {
          throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
        }
        int readerIndex = ReaderUtil.subIndex(targetDocID, leaves);
        if (readerIndex >= nextLeaf) {
          if (readerIndex == leaves.size()) {
            throw new IllegalArgumentException("Out of range: " + targetDocID);
          }
          currentLeaf = leaves.get(readerIndex);
          currentValues = currentLeaf.reader().getNormValues(field);
          nextLeaf = readerIndex+1;
        }
        docID = targetDocID;
        if (currentValues == null) {
          return false;
        }
        return currentValues.advanceExact(targetDocID - currentLeaf.docBase);
      }

      @Override
      public long longValue() throws IOException {
        return currentValues.longValue();
      }

      @Override
      public long cost() {
        // TODO
        return 0;
      }
    };
  }

  /** Returns a NumericDocValues for a reader's docvalues (potentially merging on-the-fly) */
  public static NumericDocValues getNumericValues(final IndexReader r, final String field) throws IOException {
    final List<LeafReaderContext> leaves = r.leaves();
    final int size = leaves.size();
    if (size == 0) {
      return null;
    } else if (size == 1) {
      return leaves.get(0).reader().getNumericDocValues(field);
    }

    boolean anyReal = false;
    for(LeafReaderContext leaf : leaves) {
      FieldInfo fieldInfo = leaf.reader().getFieldInfos().fieldInfo(field);
      if (fieldInfo != null) {
        DocValuesType dvType = fieldInfo.getDocValuesType();
        if (dvType == DocValuesType.NUMERIC) {
          anyReal = true;
          break;
        }
      }
    }

    if (anyReal == false) {
      return null;
    }
    

    return new NumericDocValues() {
      private int nextLeaf;
      private NumericDocValues currentValues;
      private LeafReaderContext currentLeaf;
      private int docID = -1;

      @Override
      public int docID() {
        return docID;
      }

      @Override
      public int nextDoc() throws IOException {
        while (true) {
          while (currentValues == null) {
            if (nextLeaf == leaves.size()) {
              docID = NO_MORE_DOCS;
              return docID;
            }
            currentLeaf = leaves.get(nextLeaf);
            currentValues = currentLeaf.reader().getNumericDocValues(field);
            nextLeaf++;
          }

          int newDocID = currentValues.nextDoc();

          if (newDocID == NO_MORE_DOCS) {
            currentValues = null;
            continue;
          } else {
            docID = currentLeaf.docBase + newDocID;
            return docID;
          }
        }
      }
        
      @Override
      public int advance(int targetDocID) throws IOException {
        if (targetDocID <= docID) {
          throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
        }
        int readerIndex = ReaderUtil.subIndex(targetDocID, leaves);
        if (readerIndex >= nextLeaf) {
          if (readerIndex == leaves.size()) {
            currentValues = null;
            docID = NO_MORE_DOCS;
            return docID;
          }
          currentLeaf = leaves.get(readerIndex);
          currentValues = currentLeaf.reader().getNumericDocValues(field);
          nextLeaf = readerIndex+1;
          if (currentValues == null) {
            return nextDoc();
          }
        }
        int newDocID = currentValues.advance(targetDocID - currentLeaf.docBase);
        if (newDocID == NO_MORE_DOCS) {
          currentValues = null;
          return nextDoc();
        } else {
          docID = currentLeaf.docBase + newDocID;
          return docID;
        }
      }

      @Override
      public boolean advanceExact(int targetDocID) throws IOException {
        if (targetDocID < docID) {
          throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
        }
        int readerIndex = ReaderUtil.subIndex(targetDocID, leaves);
        if (readerIndex >= nextLeaf) {
          if (readerIndex == leaves.size()) {
            throw new IllegalArgumentException("Out of range: " + targetDocID);
          }
          currentLeaf = leaves.get(readerIndex);
          currentValues = currentLeaf.reader().getNumericDocValues(field);
          nextLeaf = readerIndex+1;
        }
        docID = targetDocID;
        if (currentValues == null) {
          return false;
        }
        return currentValues.advanceExact(targetDocID - currentLeaf.docBase);
      }
      @Override
      public long longValue() throws IOException {
        return currentValues.longValue();
      }

      @Override
      public long cost() {
        // TODO
        return 0;
      }
    };
  }

  /** Returns a BinaryDocValues for a reader's docvalues (potentially merging on-the-fly) */
  public static BinaryDocValues getBinaryValues(final IndexReader r, final String field) throws IOException {
    final List<LeafReaderContext> leaves = r.leaves();
    final int size = leaves.size();
    if (size == 0) {
      return null;
    } else if (size == 1) {
      return leaves.get(0).reader().getBinaryDocValues(field);
    }

    boolean anyReal = false;
    for(LeafReaderContext leaf : leaves) {
      FieldInfo fieldInfo = leaf.reader().getFieldInfos().fieldInfo(field);
      if (fieldInfo != null) {
        DocValuesType dvType = fieldInfo.getDocValuesType();
        if (dvType == DocValuesType.BINARY) {
          anyReal = true;
          break;
        }
      }
    }

    if (anyReal == false) {
      return null;
    }

    return new BinaryDocValues() {
      private int nextLeaf;
      private BinaryDocValues currentValues;
      private LeafReaderContext currentLeaf;
      private int docID = -1;

      @Override
      public int nextDoc() throws IOException {
        while (true) {
          while (currentValues == null) {
            if (nextLeaf == leaves.size()) {
              docID = NO_MORE_DOCS;
              return docID;
            }
            currentLeaf = leaves.get(nextLeaf);
            currentValues = currentLeaf.reader().getBinaryDocValues(field);
            nextLeaf++;
          }

          int newDocID = currentValues.nextDoc();

          if (newDocID == NO_MORE_DOCS) {
            currentValues = null;
            continue;
          } else {
            docID = currentLeaf.docBase + newDocID;
            return docID;
          }
        }
      }
        
      @Override
      public int docID() {
        return docID;
      }

      @Override
      public int advance(int targetDocID) throws IOException {
        if (targetDocID <= docID) {
          throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
        }
        int readerIndex = ReaderUtil.subIndex(targetDocID, leaves);
        if (readerIndex >= nextLeaf) {
          if (readerIndex == leaves.size()) {
            currentValues = null;
            docID = NO_MORE_DOCS;
            return docID;
          }
          currentLeaf = leaves.get(readerIndex);
          currentValues = currentLeaf.reader().getBinaryDocValues(field);
          nextLeaf = readerIndex+1;
          if (currentValues == null) {
            return nextDoc();
          }
        }
        int newDocID = currentValues.advance(targetDocID - currentLeaf.docBase);
        if (newDocID == NO_MORE_DOCS) {
          currentValues = null;
          return nextDoc();
        } else {
          docID = currentLeaf.docBase + newDocID;
          return docID;
        }
      }

      @Override
      public boolean advanceExact(int targetDocID) throws IOException {
        if (targetDocID < docID) {
          throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
        }
        int readerIndex = ReaderUtil.subIndex(targetDocID, leaves);
        if (readerIndex >= nextLeaf) {
          if (readerIndex == leaves.size()) {
            throw new IllegalArgumentException("Out of range: " + targetDocID);
          }
          currentLeaf = leaves.get(readerIndex);
          currentValues = currentLeaf.reader().getBinaryDocValues(field);
          nextLeaf = readerIndex+1;
        }
        docID = targetDocID;
        if (currentValues == null) {
          return false;
        }
        return currentValues.advanceExact(targetDocID - currentLeaf.docBase);
      }

      @Override
      public BytesRef binaryValue() throws IOException {
        return currentValues.binaryValue();
      }

      @Override
      public long cost() {
        // TODO
        return 0;
      }
    };
  }

  /** Returns a SortedNumericDocValues for a reader's docvalues (potentially merging on-the-fly) 
   * <p>
   * This is a slow way to access sorted numeric values. Instead, access them per-segment
   * with {@link LeafReader#getSortedNumericDocValues(String)}
   * </p> 
   * */
  public static SortedNumericDocValues getSortedNumericValues(final IndexReader r, final String field) throws IOException {
    final List<LeafReaderContext> leaves = r.leaves();
    final int size = leaves.size();
    if (size == 0) {
      return null;
    } else if (size == 1) {
      return leaves.get(0).reader().getSortedNumericDocValues(field);
    }

    boolean anyReal = false;
    final SortedNumericDocValues[] values = new SortedNumericDocValues[size];
    final int[] starts = new int[size+1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
      LeafReaderContext context = leaves.get(i);
      SortedNumericDocValues v = context.reader().getSortedNumericDocValues(field);
      if (v == null) {
        v = DocValues.emptySortedNumeric(context.reader().maxDoc());
      } else {
        anyReal = true;
      }
      values[i] = v;
      starts[i] = context.docBase;
      totalCost += v.cost();
    }
    starts[size] = r.maxDoc();

    if (anyReal == false) {
      return null;
    }

    final long finalTotalCost = totalCost;
    
    return new SortedNumericDocValues() {
      private int nextLeaf;
      private SortedNumericDocValues currentValues;
      private LeafReaderContext currentLeaf;
      private int docID = -1;

      @Override
      public int nextDoc() throws IOException {
        while (true) {
          if (currentValues == null) {
            if (nextLeaf == leaves.size()) {
              docID = NO_MORE_DOCS;
              return docID;
            }
            currentLeaf = leaves.get(nextLeaf);
            currentValues = values[nextLeaf];
            nextLeaf++;
          }

          int newDocID = currentValues.nextDoc();

          if (newDocID == NO_MORE_DOCS) {
            currentValues = null;
            continue;
          } else {
            docID = currentLeaf.docBase + newDocID;
            return docID;
          }
        }
      }
        
      @Override
      public int docID() {
        return docID;
      }
        
      @Override
      public int advance(int targetDocID) throws IOException {
        if (targetDocID <= docID) {
          throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
        }
        int readerIndex = ReaderUtil.subIndex(targetDocID, leaves);
        if (readerIndex >= nextLeaf) {
          if (readerIndex == leaves.size()) {
            currentValues = null;
            docID = NO_MORE_DOCS;
            return docID;
          }
          currentLeaf = leaves.get(readerIndex);
          currentValues = values[readerIndex];
          nextLeaf = readerIndex+1;
        }
        int newDocID = currentValues.advance(targetDocID - currentLeaf.docBase);
        if (newDocID == NO_MORE_DOCS) {
          currentValues = null;
          return nextDoc();
        } else {
          docID = currentLeaf.docBase + newDocID;
          return docID;
        }
      }

      @Override
      public boolean advanceExact(int targetDocID) throws IOException {
        if (targetDocID < docID) {
          throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
        }
        int readerIndex = ReaderUtil.subIndex(targetDocID, leaves);
        if (readerIndex >= nextLeaf) {
          if (readerIndex == leaves.size()) {
            throw new IllegalArgumentException("Out of range: " + targetDocID);
          }
          currentLeaf = leaves.get(readerIndex);
          currentValues = values[readerIndex];
          nextLeaf = readerIndex+1;
        }
        docID = targetDocID;
        if (currentValues == null) {
          return false;
        }
        return currentValues.advanceExact(targetDocID - currentLeaf.docBase);
      }

      @Override
      public long cost() {
        return finalTotalCost;
      }
      
      @Override
      public int docValueCount() {
        return currentValues.docValueCount();
      }

      @Override
      public long nextValue() throws IOException {
        return currentValues.nextValue();
      }
    };
  }
  
  /** Returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things).
   * <p>
   * This is an extremely slow way to access sorted values. Instead, access them per-segment
   * with {@link LeafReader#getSortedDocValues(String)}
   * </p>  
   */
  public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
    final List<LeafReaderContext> leaves = r.leaves();
    final int size = leaves.size();
    
    if (size == 0) {
      return null;
    } else if (size == 1) {
      return leaves.get(0).reader().getSortedDocValues(field);
    }
    
    boolean anyReal = false;
    final SortedDocValues[] values = new SortedDocValues[size];
    final int[] starts = new int[size+1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
      LeafReaderContext context = leaves.get(i);
      SortedDocValues v = context.reader().getSortedDocValues(field);
      if (v == null) {
        v = DocValues.emptySorted();
      } else {
        anyReal = true;
        totalCost += v.cost();
      }
      values[i] = v;
      starts[i] = context.docBase;
    }
    starts[size] = r.maxDoc();
    
    if (anyReal == false) {
      return null;
    } else {
      IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
      IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
      OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
      return new MultiSortedDocValues(values, starts, mapping, totalCost);
    }
  }
  
  /** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
   * <p>
   * This is an extremely slow way to access sorted values. Instead, access them per-segment
   * with {@link LeafReader#getSortedSetDocValues(String)}
   * </p>  
   */
  public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException {
    final List<LeafReaderContext> leaves = r.leaves();
    final int size = leaves.size();
    
    if (size == 0) {
      return null;
    } else if (size == 1) {
      return leaves.get(0).reader().getSortedSetDocValues(field);
    }
    
    boolean anyReal = false;
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size+1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
      LeafReaderContext context = leaves.get(i);
      SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
      if (v == null) {
        v = DocValues.emptySortedSet();
      } else {
        anyReal = true;
        totalCost += v.cost();
      }
      values[i] = v;
      starts[i] = context.docBase;
    }
    starts[size] = r.maxDoc();
    
    if (anyReal == false) {
      return null;
    } else {
      IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
      IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
      OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
      return new MultiSortedSetDocValues(values, starts, mapping, totalCost);
    }
  }

  /** maps per-segment ordinals to/from global ordinal space */
  // TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
  // TODO: use more efficient packed ints structures?
  // TODO: pull this out? it's pretty generic (maps between N ord()-enabled TermsEnums) 
  public static class OrdinalMap implements Accountable {

    private static class SegmentMap implements Accountable {
      private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);

      /** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
      private static int[] map(final long[] weights) {
        final int[] newToOld = new int[weights.length];
        for (int i = 0; i < weights.length; ++i) {
          newToOld[i] = i;
        }
        new InPlaceMergeSorter() {
          @Override
          protected void swap(int i, int j) {
            final int tmp = newToOld[i];
            newToOld[i] = newToOld[j];
            newToOld[j] = tmp;
          }
          @Override
          protected int compare(int i, int j) {
            // j first since we actually want higher weights first
            return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
          }
        }.sort(0, weights.length);
        return newToOld;
      }

      /** Inverse the map. */
      private static int[] inverse(int[] map) {
        final int[] inverse = new int[map.length];
        for (int i = 0; i < map.length; ++i) {
          inverse[map[i]] = i;
        }
        return inverse;
      }

      private final int[] newToOld, oldToNew;

      SegmentMap(long[] weights) {
        newToOld = map(weights);
        oldToNew = inverse(newToOld);
        assert Arrays.equals(newToOld, inverse(oldToNew));
      }

      int newToOld(int segment) {
        return newToOld[segment];
      }

      int oldToNew(int segment) {
        return oldToNew[segment];
      }

      @Override
      public long ramBytesUsed() {
        return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
      }
    }

    /**
     * Create an ordinal map that uses the number of unique values of each
     * {@link SortedDocValues} instance as a weight.
     * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
     */
    public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
      final TermsEnum[] subs = new TermsEnum[values.length];
      final long[] weights = new long[values.length];
      for (int i = 0; i < values.length; ++i) {
        subs[i] = values[i].termsEnum();
        weights[i] = values[i].getValueCount();
      }
      return build(owner, subs, weights, acceptableOverheadRatio);
    }

    /**
     * Create an ordinal map that uses the number of unique values of each
     * {@link SortedSetDocValues} instance as a weight.
     * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
     */
    public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
      final TermsEnum[] subs = new TermsEnum[values.length];
      final long[] weights = new long[values.length];
      for (int i = 0; i < values.length; ++i) {
        subs[i] = values[i].termsEnum();
        weights[i] = values[i].getValueCount();
      }
      return build(owner, subs, weights, acceptableOverheadRatio);
    }

    /** 
     * Creates an ordinal map that allows mapping ords to/from a merged
     * space from <code>subs</code>.
     * @param owner a cache key
     * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
     *             not be dense (e.g. can be FilteredTermsEnums}.
     * @param weights a weight for each sub. This is ideally correlated with
     *             the number of unique terms that each sub introduces compared
     *             to the other subs
     * @throws IOException if an I/O error occurred.
     */
    public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
      if (subs.length != weights.length) {
        throw new IllegalArgumentException("subs and weights must have the same length");
      }

      // enums are not sorted, so let's sort to save memory
      final SegmentMap segmentMap = new SegmentMap(weights);
      return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
    }

    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);

    /** Cache key of whoever asked for this awful thing */
    public final IndexReader.CacheKey owner;
    // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
    final PackedLongValues globalOrdDeltas;
    // globalOrd -> first segment container
    final PackedLongValues firstSegments;
    // for every segment, segmentOrd -> globalOrd
    final LongValues segmentToGlobalOrds[];
    // the map from/to segment ids
    final SegmentMap segmentMap;
    // ram usage
    final long ramBytesUsed;
    
    OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
      // create the ordinal mappings by pulling a termsenum over each sub's 
      // unique terms, and walking a multitermsenum over those
      this.owner = owner;
      this.segmentMap = segmentMap;
      // even though we accept an overhead ratio, we keep these ones with COMPACT
      // since they are only used to resolve values given a global ord, which is
      // slow anyway
      PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
      PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
      final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
      for (int i = 0; i < ordDeltas.length; i++) {
        ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
      }
      long[] ordDeltaBits = new long[subs.length];
      long segmentOrds[] = new long[subs.length];
      ReaderSlice slices[] = new ReaderSlice[subs.length];
      TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
      for (int i = 0; i < slices.length; i++) {
        slices[i] = new ReaderSlice(0, 0, i);
        indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
      }
      MultiTermsEnum mte = new MultiTermsEnum(slices);
      mte.reset(indexes);
      long globalOrd = 0;
      while (mte.next() != null) {        
        TermsEnumWithSlice matches[] = mte.getMatchArray();
        int firstSegmentIndex = Integer.MAX_VALUE;
        long globalOrdDelta = Long.MAX_VALUE;
        for (int i = 0; i < mte.getMatchCount(); i++) {
          int segmentIndex = matches[i].index;
          long segmentOrd = matches[i].terms.ord();
          long delta = globalOrd - segmentOrd;
          // We compute the least segment where the term occurs. In case the
          // first segment contains most (or better all) values, this will
          // help save significant memory
          if (segmentIndex < firstSegmentIndex) {
            firstSegmentIndex = segmentIndex;
            globalOrdDelta = delta;
          }
          // for each per-segment ord, map it back to the global term.
          while (segmentOrds[segmentIndex] <= segmentOrd) {
            ordDeltaBits[segmentIndex] |= delta;
            ordDeltas[segmentIndex].add(delta);
            segmentOrds[segmentIndex]++;
          }
        }
        // for each unique term, just mark the first segment index/delta where it occurs
        assert firstSegmentIndex < segmentOrds.length;
        firstSegments.add(firstSegmentIndex);
        globalOrdDeltas.add(globalOrdDelta);
        globalOrd++;
      }
      this.firstSegments = firstSegments.build();
      this.globalOrdDeltas = globalOrdDeltas.build();
      // ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
      segmentToGlobalOrds = new LongValues[subs.length];
      long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
          + this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
          + segmentMap.ramBytesUsed();
      for (int i = 0; i < ordDeltas.length; ++i) {
        final PackedLongValues deltas = ordDeltas[i].build();
        if (ordDeltaBits[i] == 0L) {
          // segment ords perfectly match global ordinals
          // likely in case of low cardinalities and large segments
          segmentToGlobalOrds[i] = LongValues.IDENTITY;
        } else {
          final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
          final long monotonicBits = deltas.ramBytesUsed() * 8;
          final long packedBits = bitsRequired * deltas.size();
          if (deltas.size() <= Integer.MAX_VALUE
              && packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
            // monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
            final int size = (int) deltas.size();
            final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
            final PackedLongValues.Iterator it = deltas.iterator();
            for (int ord = 0; ord < size; ++ord) {
              newDeltas.set(ord, it.next());
            }
            assert !it.hasNext();
            segmentToGlobalOrds[i] = new LongValues() {
              @Override
              public long get(long ord) {
                return ord + newDeltas.get((int) ord);
              }
            };
            ramBytesUsed += newDeltas.ramBytesUsed();
          } else {
            segmentToGlobalOrds[i] = new LongValues() {
              @Override
              public long get(long ord) {
                return ord + deltas.get(ord);
              }
            };
            ramBytesUsed += deltas.ramBytesUsed();
          }
          ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
        }
      }
      this.ramBytesUsed = ramBytesUsed;
    }

    /** 
     * Given a segment number, return a {@link LongValues} instance that maps
     * segment ordinals to global ordinals.
     */
    public LongValues getGlobalOrds(int segmentIndex) {
      return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
    }

    /**
     * Given global ordinal, returns the ordinal of the first segment which contains
     * this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
     */
    public long getFirstSegmentOrd(long globalOrd) {
      return globalOrd - globalOrdDeltas.get(globalOrd);
    }
    
    /** 
     * Given a global ordinal, returns the index of the first
     * segment that contains this term.
     */
    public int getFirstSegmentNumber(long globalOrd) {
      return segmentMap.newToOld((int) firstSegments.get(globalOrd));
    }
    
    /**
     * Returns the total number of unique terms in global ord space.
     */
    public long getValueCount() {
      return globalOrdDeltas.size();
    }

    @Override
    public long ramBytesUsed() {
      return ramBytesUsed;
    }

    @Override
    public Collection<Accountable> getChildResources() {
      List<Accountable> resources = new ArrayList<>();
      resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas));
      resources.add(Accountables.namedAccountable("first segments", firstSegments));
      resources.add(Accountables.namedAccountable("segment map", segmentMap));
      // TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing
      return resources;
    }
  }
  
  /** 
   * Implements SortedDocValues over n subs, using an OrdinalMap
   * @lucene.internal
   */
  public static class MultiSortedDocValues extends SortedDocValues {
    /** docbase for each leaf: parallel with {@link #values} */
    public final int docStarts[];
    /** leaf values */
    public final SortedDocValues values[];
    /** ordinal map mapping ords from <code>values</code> to global ord space */
    public final OrdinalMap mapping;
    private final long totalCost;

    private int nextLeaf;
    private SortedDocValues currentValues;
    private int currentDocStart;
    private int docID = -1;    
  
    /** Creates a new MultiSortedDocValues over <code>values</code> */
    public MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping, long totalCost) throws IOException {
      assert docStarts.length == values.length + 1;
      this.values = values;
      this.docStarts = docStarts;
      this.mapping = mapping;
      this.totalCost = totalCost;
    }
       
    @Override
    public int docID() {
      return docID;
    }

    @Override
    public int nextDoc() throws IOException {
      while (true) {
        while (currentValues == null) {
          if (nextLeaf == values.length) {
            docID = NO_MORE_DOCS;
            return docID;
          }
          currentDocStart = docStarts[nextLeaf];
          currentValues = values[nextLeaf];
          nextLeaf++;
        }

        int newDocID = currentValues.nextDoc();

        if (newDocID == NO_MORE_DOCS) {
          currentValues = null;
          continue;
        } else {
          docID = currentDocStart + newDocID;
          return docID;
        }
      }
    }

    @Override
    public int advance(int targetDocID) throws IOException {
      if (targetDocID <= docID) {
        throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
      }
      int readerIndex = ReaderUtil.subIndex(targetDocID, docStarts);
      if (readerIndex >= nextLeaf) {
        if (readerIndex == values.length) {
          currentValues = null;
          docID = NO_MORE_DOCS;
          return docID;
        }
        currentDocStart = docStarts[readerIndex];
        currentValues = values[readerIndex];
        nextLeaf = readerIndex+1;
      }
      int newDocID = currentValues.advance(targetDocID - currentDocStart);
      if (newDocID == NO_MORE_DOCS) {
        currentValues = null;
        return nextDoc();
      } else {
        docID = currentDocStart + newDocID;
        return docID;
      }
    }
    
    @Override
    public boolean advanceExact(int targetDocID) throws IOException {
      if (targetDocID < docID) {
        throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
      }
      int readerIndex = ReaderUtil.subIndex(targetDocID, docStarts);
      if (readerIndex >= nextLeaf) {
        if (readerIndex == values.length) {
          throw new IllegalArgumentException("Out of range: " + targetDocID);
        }
        currentDocStart = docStarts[readerIndex];
        currentValues = values[readerIndex];
        nextLeaf = readerIndex+1;
      }
      docID = targetDocID;
      if (currentValues == null) {
        return false;
      }
      return currentValues.advanceExact(targetDocID - currentDocStart);
    }
    
    @Override
    public int ordValue() throws IOException {
      return (int) mapping.getGlobalOrds(nextLeaf-1).get(currentValues.ordValue());
    }
 
    @Override
    public BytesRef lookupOrd(int ord) throws IOException {
      int subIndex = mapping.getFirstSegmentNumber(ord);
      int segmentOrd = (int) mapping.getFirstSegmentOrd(ord);
      return values[subIndex].lookupOrd(segmentOrd);
    }
 
    @Override
    public int getValueCount() {
      return (int) mapping.getValueCount();
    }

    @Override
    public long cost() {
      return totalCost;
    }
  }
  
  /** 
   * Implements MultiSortedSetDocValues over n subs, using an OrdinalMap 
   * @lucene.internal
   */
  public static class MultiSortedSetDocValues extends SortedSetDocValues {
    /** docbase for each leaf: parallel with {@link #values} */
    public final int docStarts[];
    /** leaf values */
    public final SortedSetDocValues values[];
    /** ordinal map mapping ords from <code>values</code> to global ord space */
    public final OrdinalMap mapping;
    private final long totalCost;

    private int nextLeaf;
    private SortedSetDocValues currentValues;
    private int currentDocStart;
    private int docID = -1;    

    /** Creates a new MultiSortedSetDocValues over <code>values</code> */
    public MultiSortedSetDocValues(SortedSetDocValues values[], int docStarts[], OrdinalMap mapping, long totalCost) throws IOException {
      assert docStarts.length == values.length + 1;
      this.values = values;
      this.docStarts = docStarts;
      this.mapping = mapping;
      this.totalCost = totalCost;
    }
    
    @Override
    public int docID() {
      return docID;
    }

    @Override
    public int nextDoc() throws IOException {
      while (true) {
        while (currentValues == null) {
          if (nextLeaf == values.length) {
            docID = NO_MORE_DOCS;
            return docID;
          }
          currentDocStart = docStarts[nextLeaf];
          currentValues = values[nextLeaf];
          nextLeaf++;
        }

        int newDocID = currentValues.nextDoc();

        if (newDocID == NO_MORE_DOCS) {
          currentValues = null;
          continue;
        } else {
          docID = currentDocStart + newDocID;
          return docID;
        }
      }
    }

    @Override
    public int advance(int targetDocID) throws IOException {
      if (targetDocID <= docID) {
        throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
      }
      int readerIndex = ReaderUtil.subIndex(targetDocID, docStarts);
      if (readerIndex >= nextLeaf) {
        if (readerIndex == values.length) {
          currentValues = null;
          docID = NO_MORE_DOCS;
          return docID;
        }
        currentDocStart = docStarts[readerIndex];
        currentValues = values[readerIndex];
        nextLeaf = readerIndex+1;
      }
      int newDocID = currentValues.advance(targetDocID - currentDocStart);
      if (newDocID == NO_MORE_DOCS) {
        currentValues = null;
        return nextDoc();
      } else {
        docID = currentDocStart + newDocID;
        return docID;
      }
    }

    @Override
    public boolean advanceExact(int targetDocID) throws IOException {
      if (targetDocID < docID) {
        throw new IllegalArgumentException("can only advance beyond current document: on docID=" + docID + " but targetDocID=" + targetDocID);
      }
      int readerIndex = ReaderUtil.subIndex(targetDocID, docStarts);
      if (readerIndex >= nextLeaf) {
        if (readerIndex == values.length) {
          throw new IllegalArgumentException("Out of range: " + targetDocID);
        }
        currentDocStart = docStarts[readerIndex];
        currentValues = values[readerIndex];
        nextLeaf = readerIndex+1;
      }
      docID = targetDocID;
      if (currentValues == null) {
        return false;
      }
      return currentValues.advanceExact(targetDocID - currentDocStart);
    }

    @Override
    public long nextOrd() throws IOException {
      long segmentOrd = currentValues.nextOrd();
      if (segmentOrd == NO_MORE_ORDS) {
        return segmentOrd;
      } else {
        return mapping.getGlobalOrds(nextLeaf-1).get(segmentOrd);
      }
    }

    @Override
    public BytesRef lookupOrd(long ord) throws IOException {
      int subIndex = mapping.getFirstSegmentNumber(ord);
      long segmentOrd = mapping.getFirstSegmentOrd(ord);
      return values[subIndex].lookupOrd(segmentOrd);
    }
 
    @Override
    public long getValueCount() {
      return mapping.getValueCount();
    }

    @Override
    public long cost() {
      return totalCost;
    }
  }
}