FieldCache.java example

Explorer
pylucene-master
package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs

import java.io.IOException;
import java.io.Serializable;
import java.io.PrintStream;

import java.text.DecimalFormat;

/**
 * Expert: Maintains caches of term values.
 *
 * <p>Created: May 19, 2004 11:13:14 AM
 *
 * @since   lucene 1.4
 * @see org.apache.lucene.util.FieldCacheSanityChecker
 */
public interface FieldCache {

  public static final class CreationPlaceholder {
    Object value;
  }

  /** Indicator for StringIndex values in the cache. */
  // NOTE: the value assigned to this constant must not be
  // the same as any of those in SortField!!
  public static final int STRING_INDEX = -1;


  /** Expert: Stores term text values and document ordering data. */
  public static class StringIndex {
	  
    public int binarySearchLookup(String key) {
      // this special case is the reason that Arrays.binarySearch() isn't useful.
      if (key == null)
        return 0;
	  
      int low = 1;
      int high = lookup.length-1;

      while (low <= high) {
        int mid = (low + high) >>> 1;
        int cmp = lookup[mid].compareTo(key);

        if (cmp < 0)
          low = mid + 1;
        else if (cmp > 0)
          high = mid - 1;
        else
          return mid; // key found
      }
      return -(low + 1);  // key not found.
    }
	
    /** All the term values, in natural order. */
    public final String[] lookup;

    /** For each document, an index into the lookup array. */
    public final int[] order;

    /** Creates one of these objects */
    public StringIndex (int[] values, String[] lookup) {
      this.order = values;
      this.lookup = lookup;
    }
  }

  /**
   * Marker interface as super-interface to all parsers. It
   * is used to specify a custom parser to {@link
   * SortField#SortField(String, FieldCache.Parser)}.
   */
  public interface Parser extends Serializable {
  }

  /** Interface to parse bytes from document fields.
   * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser)
   */
  public interface ByteParser extends Parser {
    /** Return a single Byte representation of this field's value. */
    public byte parseByte(String string);
  }

  /** Interface to parse shorts from document fields.
   * @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser)
   */
  public interface ShortParser extends Parser {
    /** Return a short representation of this field's value. */
    public short parseShort(String string);
  }

  /** Interface to parse ints from document fields.
   * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser)
   */
  public interface IntParser extends Parser {
    /** Return an integer representation of this field's value. */
    public int parseInt(String string);
  }

  /** Interface to parse floats from document fields.
   * @see FieldCache#getFloats(IndexReader, String, FieldCache.FloatParser)
   */
  public interface FloatParser extends Parser {
    /** Return an float representation of this field's value. */
    public float parseFloat(String string);
  }

  /** Interface to parse long from document fields.
   * @see FieldCache#getLongs(IndexReader, String, FieldCache.LongParser)
   */
  public interface LongParser extends Parser {
    /** Return an long representation of this field's value. */
    public long parseLong(String string);
  }

  /** Interface to parse doubles from document fields.
   * @see FieldCache#getDoubles(IndexReader, String, FieldCache.DoubleParser)
   */
  public interface DoubleParser extends Parser {
    /** Return an long representation of this field's value. */
    public double parseDouble(String string);
  }

  /** Expert: The cache used internally by sorting and range query classes. */
  public static FieldCache DEFAULT = new FieldCacheImpl();
  
  /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */
  public static final ByteParser DEFAULT_BYTE_PARSER = new ByteParser() {
    public byte parseByte(String value) {
      return Byte.parseByte(value);
    }
    protected Object readResolve() {
      return DEFAULT_BYTE_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER"; 
    }
  };

  /** The default parser for short values, which are encoded by {@link Short#toString(short)} */
  public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() {
    public short parseShort(String value) {
      return Short.parseShort(value);
    }
    protected Object readResolve() {
      return DEFAULT_SHORT_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER"; 
    }
  };

  /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */
  public static final IntParser DEFAULT_INT_PARSER = new IntParser() {
    public int parseInt(String value) {
      return Integer.parseInt(value);
    }
    protected Object readResolve() {
      return DEFAULT_INT_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".DEFAULT_INT_PARSER"; 
    }
  };

  /** The default parser for float values, which are encoded by {@link Float#toString(float)} */
  public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() {
    public float parseFloat(String value) {
      return Float.parseFloat(value);
    }
    protected Object readResolve() {
      return DEFAULT_FLOAT_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER"; 
    }
  };

  /** The default parser for long values, which are encoded by {@link Long#toString(long)} */
  public static final LongParser DEFAULT_LONG_PARSER = new LongParser() {
    public long parseLong(String value) {
      return Long.parseLong(value);
    }
    protected Object readResolve() {
      return DEFAULT_LONG_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".DEFAULT_LONG_PARSER"; 
    }
  };

  /** The default parser for double values, which are encoded by {@link Double#toString(double)} */
  public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() {
    public double parseDouble(String value) {
      return Double.parseDouble(value);
    }
    protected Object readResolve() {
      return DEFAULT_DOUBLE_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER"; 
    }
  };

  /**
   * A parser instance for int values encoded by {@link NumericUtils#intToPrefixCoded(int)}, e.g. when indexed
   * via {@link NumericField}/{@link NumericTokenStream}.
   */
  public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
    public int parseInt(String val) {
      final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
      if (shift>0 && shift<=31)
        throw new FieldCacheImpl.StopFillCacheException();
      return NumericUtils.prefixCodedToInt(val);
    }
    protected Object readResolve() {
      return NUMERIC_UTILS_INT_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER"; 
    }
  };

  /**
   * A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed
   * via {@link NumericField}/{@link NumericTokenStream}.
   */
  public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
    public float parseFloat(String val) {
      final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
      if (shift>0 && shift<=31)
        throw new FieldCacheImpl.StopFillCacheException();
      return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(val));
    }
    protected Object readResolve() {
      return NUMERIC_UTILS_FLOAT_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER"; 
    }
  };

  /**
   * A parser instance for long values encoded by {@link NumericUtils#longToPrefixCoded(long)}, e.g. when indexed
   * via {@link NumericField}/{@link NumericTokenStream}.
   */
  public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
    public long parseLong(String val) {
      final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
      if (shift>0 && shift<=63)
        throw new FieldCacheImpl.StopFillCacheException();
      return NumericUtils.prefixCodedToLong(val);
    }
    protected Object readResolve() {
      return NUMERIC_UTILS_LONG_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER"; 
    }
  };

  /**
   * A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed
   * via {@link NumericField}/{@link NumericTokenStream}.
   */
  public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
    public double parseDouble(String val) {
      final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
      if (shift>0 && shift<=63)
        throw new FieldCacheImpl.StopFillCacheException();
      return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(val));
    }
    protected Object readResolve() {
      return NUMERIC_UTILS_DOUBLE_PARSER;
    }
    @Override
    public String toString() { 
      return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER"; 
    }
  };

  /** Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> and returns a bit set at the size of
   * <code>reader.maxDoc()</code>, with turned on bits for each docid that 
   * does have a value for this field.
   */
  public Bits getDocsWithField(IndexReader reader, String field) 
  throws IOException;
  
  /** Checks the internal cache for an appropriate entry, and if none is
   * found, reads the terms in <code>field</code> as a single byte and returns an array
   * of size <code>reader.maxDoc()</code> of the value each document
   * has in the given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the single byte values.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public byte[] getBytes (IndexReader reader, String field)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as bytes and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the bytes.
   * @param parser  Computes byte for string values.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as bytes and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the bytes.
   * @param parser  Computes byte for string values.
   * @param setDocsWithField  If true then {@link #getDocsWithField} will
   *        also be computed and stored in the FieldCache.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public byte[] getBytes (IndexReader reader, String field, ByteParser parser, boolean setDocsWithField)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none is
   * found, reads the terms in <code>field</code> as shorts and returns an array
   * of size <code>reader.maxDoc()</code> of the value each document
   * has in the given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the shorts.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public short[] getShorts (IndexReader reader, String field)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as shorts and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the shorts.
   * @param parser  Computes short for string values.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public short[] getShorts (IndexReader reader, String field, ShortParser parser)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as shorts and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the shorts.
   * @param parser  Computes short for string values.
   * @param setDocsWithField  If true then {@link #getDocsWithField} will
   *        also be computed and stored in the FieldCache.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public short[] getShorts (IndexReader reader, String field, ShortParser parser, boolean setDocsWithField)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none is
   * found, reads the terms in <code>field</code> as integers and returns an array
   * of size <code>reader.maxDoc()</code> of the value each document
   * has in the given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the integers.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public int[] getInts (IndexReader reader, String field)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as integers and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the integers.
   * @param parser  Computes integer for string values.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public int[] getInts (IndexReader reader, String field, IntParser parser)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as integers and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the integers.
   * @param parser  Computes integer for string values.
   * @param setDocsWithField  If true then {@link #getDocsWithField} will
   *        also be computed and stored in the FieldCache.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public int[] getInts (IndexReader reader, String field, IntParser parser, boolean setDocsWithField)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if
   * none is found, reads the terms in <code>field</code> as floats and returns an array
   * of size <code>reader.maxDoc()</code> of the value each document
   * has in the given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the floats.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public float[] getFloats (IndexReader reader, String field)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if
   * none is found, reads the terms in <code>field</code> as floats and returns an array
   * of size <code>reader.maxDoc()</code> of the value each document
   * has in the given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the floats.
   * @param parser  Computes float for string values.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public float[] getFloats (IndexReader reader, String field,
                            FloatParser parser) throws IOException;
  
  /** Checks the internal cache for an appropriate entry, and if
   * none is found, reads the terms in <code>field</code> as floats and returns an array
   * of size <code>reader.maxDoc()</code> of the value each document
   * has in the given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the floats.
   * @param parser  Computes float for string values.
   * @param setDocsWithField  If true then {@link #getDocsWithField} will
   *        also be computed and stored in the FieldCache.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public float[] getFloats (IndexReader reader, String field,
                            FloatParser parser, boolean setDocsWithField) throws IOException;
  
  /**
   * Checks the internal cache for an appropriate entry, and if none is
   * found, reads the terms in <code>field</code> as longs and returns an array
   * of size <code>reader.maxDoc()</code> of the value each document
   * has in the given field.
   *
   * @param reader Used to get field values.
   * @param field  Which field contains the longs.
   * @return The values in the given field for each document.
   * @throws java.io.IOException If any error occurs.
   */
  public long[] getLongs(IndexReader reader, String field)
          throws IOException;

  /**
   * Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as longs and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   *
   * @param reader Used to get field values.
   * @param field  Which field contains the longs.
   * @param parser Computes integer for string values.
   * @return The values in the given field for each document.
   * @throws IOException If any error occurs.
   */
  public long[] getLongs(IndexReader reader, String field, LongParser parser)
          throws IOException;
  /**
   * Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as longs and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   *
   * @param reader Used to get field values.
   * @param field  Which field contains the longs.
   * @param parser Computes integer for string values.
   * @param setDocsWithField  If true then {@link #getDocsWithField} will
   *        also be computed and stored in the FieldCache.
   * @return The values in the given field for each document.
   * @throws IOException If any error occurs.
   */
  public long[] getLongs(IndexReader reader, String field, LongParser parser, boolean setDocsWithField)
          throws IOException;

  /**
   * Checks the internal cache for an appropriate entry, and if none is
   * found, reads the terms in <code>field</code> as integers and returns an array
   * of size <code>reader.maxDoc()</code> of the value each document
   * has in the given field.
   *
   * @param reader Used to get field values.
   * @param field  Which field contains the doubles.
   * @return The values in the given field for each document.
   * @throws IOException If any error occurs.
   */
  public double[] getDoubles(IndexReader reader, String field)
          throws IOException;

  /**
   * Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as doubles and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   *
   * @param reader Used to get field values.
   * @param field  Which field contains the doubles.
   * @param parser Computes integer for string values.
   * @return The values in the given field for each document.
   * @throws IOException If any error occurs.
   */
  public double[] getDoubles(IndexReader reader, String field, DoubleParser parser)
          throws IOException;

  /**
   * Checks the internal cache for an appropriate entry, and if none is found,
   * reads the terms in <code>field</code> as doubles and returns an array of
   * size <code>reader.maxDoc()</code> of the value each document has in the
   * given field.
   *
   * @param reader Used to get field values.
   * @param field  Which field contains the doubles.
   * @param parser Computes integer for string values.
   * @param setDocsWithField  If true then {@link #getDocsWithField} will
   *        also be computed and stored in the FieldCache.
   * @return The values in the given field for each document.
   * @throws IOException If any error occurs.
   */
  public double[] getDoubles(IndexReader reader, String field, DoubleParser parser, boolean setDocsWithField)
          throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none
   * is found, reads the term values in <code>field</code> and returns an array
   * of size <code>reader.maxDoc()</code> containing the value each document
   * has in the given field.
   * @param reader  Used to get field values.
   * @param field   Which field contains the strings.
   * @return The values in the given field for each document.
   * @throws IOException  If any error occurs.
   */
  public String[] getStrings (IndexReader reader, String field)
  throws IOException;

  /** Checks the internal cache for an appropriate entry, and if none
   * is found reads the term values in <code>field</code> and returns
   * an array of them in natural order, along with an array telling
   * which element in the term array each document uses.
   * @param reader  Used to get field values.
   * @param field   Which field contains the strings.
   * @return Array of terms and index into the array for each document.
   * @throws IOException  If any error occurs.
   */
  public StringIndex getStringIndex (IndexReader reader, String field)
  throws IOException;

  /**
   * EXPERT: A unique Identifier/Description for each item in the FieldCache. 
   * Can be useful for logging/debugging.
   * @lucene.experimental
   */
  public static abstract class CacheEntry {
    public abstract Object getReaderKey();
    public abstract String getFieldName();
    public abstract Class<?> getCacheType();
    public abstract Object getCustom();
    public abstract Object getValue();
    private String size = null;
    protected final void setEstimatedSize(String size) {
      this.size = size;
    }
    /** 
     * @see #estimateSize(RamUsageEstimator)
     */
    public void estimateSize() {
      estimateSize(new RamUsageEstimator(false)); // doesn't check for interned
    }
    /** 
     * Computes (and stores) the estimated size of the cache Value 
     * @see #getEstimatedSize
     */
    public void estimateSize(RamUsageEstimator ramCalc) {
      long size = ramCalc.estimateRamUsage(getValue());
      setEstimatedSize(RamUsageEstimator.humanReadableUnits
                       (size, new DecimalFormat("0.#")));
                        
    }
    /**
     * The most recently estimated size of the value, null unless 
     * estimateSize has been called.
     */
    public final String getEstimatedSize() {
      return size;
    }
    
    
    @Override
    public String toString() {
      StringBuilder b = new StringBuilder();
      b.append("'").append(getReaderKey()).append("'=>");
      b.append("'").append(getFieldName()).append("',");
      b.append(getCacheType()).append(",").append(getCustom());
      b.append("=>").append(getValue().getClass().getName()).append("#");
      b.append(System.identityHashCode(getValue()));
      
      String s = getEstimatedSize();
      if(null != s) {
        b.append(" (size =~ ").append(s).append(')');
      }

      return b.toString();
    }
  
  }

  /**
   * EXPERT: Generates an array of CacheEntry objects representing all items 
   * currently in the FieldCache.
   * <p>
   * NOTE: These CacheEntry objects maintain a strong reference to the 
   * Cached Values.  Maintaining references to a CacheEntry the IndexReader 
   * associated with it has garbage collected will prevent the Value itself
   * from being garbage collected when the Cache drops the WeakReference.
   * </p>
   * @lucene.experimental
   */
  public abstract CacheEntry[] getCacheEntries();

  /**
   * <p>
   * EXPERT: Instructs the FieldCache to forcibly expunge all entries 
   * from the underlying caches.  This is intended only to be used for 
   * test methods as a way to ensure a known base state of the Cache 
   * (with out needing to rely on GC to free WeakReferences).  
   * It should not be relied on for "Cache maintenance" in general 
   * application code.
   * </p>
   * @lucene.experimental
   */
  public abstract void purgeAllCaches();

  /**
   * Expert: drops all cache entries associated with this
   * reader.  NOTE: this reader must precisely match the
   * reader that the cache entry is keyed on. If you pass a
   * top-level reader, it usually will have no effect as
   * Lucene now caches at the segment reader level.
   */
  public abstract void purge(IndexReader r);

  /**
   * If non-null, FieldCacheImpl will warn whenever
   * entries are created that are not sane according to
   * {@link org.apache.lucene.util.FieldCacheSanityChecker}.
   */
  public void setInfoStream(PrintStream stream);

  /** counterpart of {@link #setInfoStream(PrintStream)} */
  public PrintStream getInfoStream();
}