UninvertingReader.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.uninverting;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.function.Function;

import org.apache.lucene.document.BinaryDocValuesField; // javadocs
import org.apache.lucene.document.NumericDocValuesField; // javadocs
import org.apache.lucene.document.SortedDocValuesField; // javadocs
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
import org.apache.lucene.document.StringField; // javadocs
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.solr.uninverting.FieldCache.CacheEntry;

/**
 * A FilterReader that exposes <i>indexed</i> values as if they also had
 * docvalues.
 * <p>
 * This is accomplished by "inverting the inverted index" or "uninversion".
 * <p>
 * The uninversion process happens lazily: upon the first request for the 
 * field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} 
 * or similar), it will create the docvalues on-the-fly if needed and cache it,
 * based on the core cache key of the wrapped LeafReader.
 */
public class UninvertingReader extends FilterLeafReader {
  
  /**
   * Specifies the type of uninversion to apply for the field. 
   */
  public static enum Type {
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     */
    INTEGER_POINT,
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     */
    LONG_POINT,
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     */
    FLOAT_POINT,
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     */
    DOUBLE_POINT,
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyIntField})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     * @deprecated Index with points and use {@link #INTEGER_POINT} instead.
     */
    @Deprecated
    LEGACY_INTEGER,
    /** 
     * Single-valued Long, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyLongField})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     * @deprecated Index with points and use {@link #LONG_POINT} instead.
     */
    @Deprecated
    LEGACY_LONG,
    /** 
     * Single-valued Float, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyFloatField})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     * @deprecated Index with points and use {@link #FLOAT_POINT} instead.
     */
    @Deprecated
    LEGACY_FLOAT,
    /** 
     * Single-valued Double, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyDoubleField})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     * @deprecated Index with points and use {@link #DOUBLE_POINT} instead.
     */
    @Deprecated
    LEGACY_DOUBLE,
    /** 
     * Single-valued Binary, (e.g. indexed with {@link StringField}) 
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link BinaryDocValuesField}.
     */
    BINARY,
    /** 
     * Single-valued Binary, (e.g. indexed with {@link StringField}) 
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link SortedDocValuesField}.
     */
    SORTED,
    /** 
     * Multi-valued Binary, (e.g. indexed with {@link StringField}) 
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_BINARY,
    /** 
     * Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyIntField})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_INTEGER,
    /** 
     * Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyFloatField})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_FLOAT,
    /** 
     * Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyLongField})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_LONG,
    /** 
     * Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyDoubleField})
     * <p>
     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_DOUBLE

  }
  
  /**
   * 
   * Wraps a provided DirectoryReader. Note that for convenience, the returned reader
   * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
   * and so on. 
   * 
   * @param in input directory reader
   * @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type
   * @return a wrapped directory reader
   */
  public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException {
    return new UninvertingDirectoryReader(in, perSegmentMapper);
  }
  
  public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
    return UninvertingReader.wrap(in, (r) -> mapping);
  }
  
  static class UninvertingDirectoryReader extends FilterDirectoryReader {
    final Function<LeafReader, Map<String,Type>> mapper;
    
    public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException {
      super(in, new FilterDirectoryReader.SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader reader) {
          return new UninvertingReader(reader, mapper.apply(reader));
        }
      });
      this.mapper = mapper;
    }

    @Override
    protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
      return new UninvertingDirectoryReader(in, mapper);
    }

    // NOTE: delegating the cache helpers is wrong since this wrapper alters the
    // content of the reader, it is only fine to do that because Solr ALWAYS
    // consumes index readers through this wrapper

    @Override
    public CacheHelper getReaderCacheHelper() {
      return in.getReaderCacheHelper();
    }
  }
  
  final Map<String,Type> mapping;
  final FieldInfos fieldInfos;
  
  /** 
   * Create a new UninvertingReader with the specified mapping 
   * <p>
   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)}
   * instead.
   *  
   * @lucene.internal
   */
  public UninvertingReader(LeafReader in, Map<String,Type> mapping) {
    super(in);
    this.mapping = mapping;
    ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
    for (FieldInfo fi : in.getFieldInfos()) {
      DocValuesType type = fi.getDocValuesType();
      if (type == DocValuesType.NONE) {        
        Type t = mapping.get(fi.name);
        if (t != null) {
          if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
            // type uses points
            if (fi.getPointDimensionCount() == 0) {
              continue;
            }
          } else {
            // type uses inverted index
            if (fi.getIndexOptions() == IndexOptions.NONE) {
              continue;
            }
          }
          switch(t) {
            case INTEGER_POINT:
            case LONG_POINT:
            case FLOAT_POINT:
            case DOUBLE_POINT:
            case LEGACY_INTEGER:
            case LEGACY_LONG:
            case LEGACY_FLOAT:
            case LEGACY_DOUBLE:
              type = DocValuesType.NUMERIC;
              break;
            case BINARY:
              type = DocValuesType.BINARY;
              break;
            case SORTED:
              type = DocValuesType.SORTED;
              break;
            case SORTED_SET_BINARY:
            case SORTED_SET_INTEGER:
            case SORTED_SET_FLOAT:
            case SORTED_SET_LONG:
            case SORTED_SET_DOUBLE:
              type = DocValuesType.SORTED_SET;
              break;
            default:
              throw new AssertionError();
          }
        }
      }
      filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
          fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
          fi.getPointDimensionCount(), fi.getPointNumBytes()));
    }
    fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
  }

  @Override
  public FieldInfos getFieldInfos() {
    return fieldInfos;
  }

  @Override
  public NumericDocValues getNumericDocValues(String field) throws IOException {
    NumericDocValues values = super.getNumericDocValues(field);
    if (values != null) {
      return values;
    }
    Type v = getType(field);
    if (v != null) {
      switch (v) {
        case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER);
        case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER);
        case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER);
        case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER);
        case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER);
        case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER);
        case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER);
        case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER);
      }
    }
    return null;
  }

  @Override
  public BinaryDocValues getBinaryDocValues(String field) throws IOException {
    BinaryDocValues values = in.getBinaryDocValues(field);
    if (values != null) {
      return values;
    }
    Type v = getType(field);
    if (v == Type.BINARY) {
      return FieldCache.DEFAULT.getTerms(in, field);
    } else {
      return null;
    }
  }

  @Override
  public SortedDocValues getSortedDocValues(String field) throws IOException {
    SortedDocValues values = in.getSortedDocValues(field);
    if (values != null) {
      return values;
    }
    Type v = getType(field);
    if (v == Type.SORTED) {
      return FieldCache.DEFAULT.getTermsIndex(in, field);
    } else {
      return null;
    }
  }
  
  @Override
  public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    SortedSetDocValues values = in.getSortedSetDocValues(field);
    if (values != null) {
      return values;
    }
    Type v = getType(field);
    if (v != null) {
      switch (v) {
        case SORTED_SET_INTEGER:
        case SORTED_SET_FLOAT: 
          return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
        case SORTED_SET_LONG:
        case SORTED_SET_DOUBLE:
          return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
        case SORTED_SET_BINARY:
          return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
      }
    }
    return null;
  }

  /** 
   * Returns the field's uninversion type, or null 
   * if the field doesn't exist or doesn't have a mapping.
   */
  private Type getType(String field) {
    FieldInfo info = fieldInfos.fieldInfo(field);
    if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
      return null;
    }
    return mapping.get(field);
  }

  // NOTE: delegating the cache helpers is wrong since this wrapper alters the
  // content of the reader, it is only fine to do that because Solr ALWAYS
  // consumes index readers through this wrapper

  @Override
  public CacheHelper getCoreCacheHelper() {
    return in.getCoreCacheHelper();
  }

  @Override
  public CacheHelper getReaderCacheHelper() {
    return in.getReaderCacheHelper();
  }

  @Override
  public String toString() {
    return "Uninverting(" + in.toString() + ")";
  }
  
  /** 
   * Return information about the backing cache
   * @lucene.internal 
   */
  public static FieldCacheStats getUninvertedStats() {
    CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
    long totalBytesUsed = 0;
    String[] info = new String[entries.length];
    for (int i = 0; i < entries.length; i++) {
      info[i] = entries[i].toString();
      totalBytesUsed += entries[i].getValue().ramBytesUsed();
    }
    String totalSize = RamUsageEstimator.humanReadableUnits(totalBytesUsed);
    return new FieldCacheStats(totalSize, info);
  }

  public static int getUninvertedStatsSize() {
    return FieldCache.DEFAULT.getCacheEntries().length;
  }

  /**
   * Return information about the backing cache
   * @lucene.internal
   */
  public static class FieldCacheStats {
    public String totalSize;
    public String[] info;

    public FieldCacheStats(String totalSize, String[] info) {
      this.totalSize = totalSize;
      this.info = info;
    }

  }
}