FieldScoreQuery.java example

Explorer
pylucene-master
package org.apache.lucene.search.function;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * A query that scores each document as the value of the numeric input field.
 * <p> 
 * The query matches all documents, and scores each document according to the numeric 
 * value of that field. 
 * <p>
 * It is assumed, and expected, that:
 * <ul>
 *  <li>The field used here is indexed, and has exactly 
 *      one token in every scored document.</li> 
 *  <li>Best if this field is un_tokenized.</li>
 *  <li>That token is parseable to the selected type.</li>
 * </ul>
 * <p>  
 * Combining this query in a FunctionQuery allows much freedom in affecting document scores.
 * Note, that with this freedom comes responsibility: it is more than likely that the
 * default Lucene scoring is superior in quality to scoring modified as explained here.
 * However, in some cases, and certainly for research experiments, this capability may turn useful.
 * <p>
 * When constructing this query, select the appropriate type. That type should match the data stored in the
 * field. So in fact the "right" type should be selected before indexing. Type selection
 * has effect on the RAM usage: 
 * <ul>
 *   <li>{@link Type#BYTE} consumes 1 * maxDocs bytes.</li>
 *   <li>{@link Type#SHORT} consumes 2 * maxDocs bytes.</li>
 *   <li>{@link Type#INT} consumes 4 * maxDocs bytes.</li>
 *   <li>{@link Type#FLOAT} consumes 8 * maxDocs bytes.</li>
 * </ul>
 * <p>
 * <b>Caching:</b>
 * Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader. 
 * To take advantage of this, it is extremely important to reuse index-readers or index-searchers, 
 * otherwise, for instance if for each query a new index reader is opened, large penalties would be 
 * paid for loading the field values into memory over and over again!
 * 
 * @lucene.experimental
 */
public class FieldScoreQuery extends ValueSourceQuery {

  /**
   * Type of score field, indicating how field values are interpreted/parsed.  
   * <p>
   * The type selected at search search time should match the data stored in the field. 
   * Different types have different RAM requirements: 
   * <ul>
   *   <li>{@link #BYTE} consumes 1 * maxDocs bytes.</li>
   *   <li>{@link #SHORT} consumes 2 * maxDocs bytes.</li>
   *   <li>{@link #INT} consumes 4 * maxDocs bytes.</li>
   *   <li>{@link #FLOAT} consumes 8 * maxDocs bytes.</li>
   * </ul>
   */
  public static class Type {
    
    /** field values are interpreted as numeric byte values. */
    public static final Type BYTE = new Type("byte"); 

    /** field values are interpreted as numeric short values. */
    public static final Type SHORT = new Type("short"); 

    /** field values are interpreted as numeric int values. */
    public static final Type INT = new Type("int"); 

    /** field values are interpreted as numeric float values. */
    public static final Type FLOAT = new Type("float"); 

    private String typeName;
    private Type (String name) {
      this.typeName = name;
    }
    /*(non-Javadoc) @see java.lang.Object#toString() */
    @Override
    public String toString() {
      return getClass().getName()+"::"+typeName;
    }
  }
  
  /**
   * Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field.
   * <p>
   * The <code>type</code> param tells how to parse the field string values into a numeric score value.
   * @param field the numeric field to be used.
   * @param type the type of the field: either
   * {@link Type#BYTE}, {@link Type#SHORT}, {@link Type#INT}, or {@link Type#FLOAT}. 
   */
  public FieldScoreQuery(String field, Type type) {
    super(getValueSource(field,type));
  }

  // create the appropriate (cached) field value source.  
  private static ValueSource getValueSource(String field, Type type) {
    if (type == Type.BYTE) {
      return new ByteFieldSource(field);
    }
    if (type == Type.SHORT) {
      return new ShortFieldSource(field);
    }
    if (type == Type.INT) {
      return new IntFieldSource(field);
    }
    if (type == Type.FLOAT) {
      return new FloatFieldSource(field);
    }
    throw new IllegalArgumentException(type+" is not a known Field Score Query Type!");
  }

}