FieldDefinition.java example

Explorer
lux-master
- src
package lux.index.field;

import lux.exception.LuxException;
import lux.index.FieldRole;
import lux.index.XmlIndexer;
import lux.query.RangePQuery;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.SortField;
import org.apache.solr.schema.FieldProperties;

/**
 * represents a field in the index corresponding to some XML content.
 * 
 * An XmlField has a name, which may be configured, but must remain the same for all uses
 * of a single index.
 * 
 * XmlField provides methods for retrieving Lucene field configuration, and for retrieving values
 * from the XmlIndexer to be passed to Lucene.
 * 
 * see {@link lux.index.IndexConfiguration} for a list of current built-in fields.
 */
public abstract class FieldDefinition {

    private final FieldRole role;
    
    private String name;

    // indicate whether assumptions are being made about the name of this field.
    // Some fields are treated in a special way so that the names of the Lucene fields
    // can be altered by configuration (for example so as to be compatible with an 
    // existing schema).  Other fields are not expected to be renamed, and their field
    // names are assumed to always have certain value.
    private final boolean renameable;
    
    // a collation for ordering strings - placeholder for future implementation
    // private String collation;
    
    public boolean isRenameable() {
        return renameable;
    }

    /** Represents the type of data fed to the index for a given field.
     * TOKENS-type fields are expected to provide a TokenStream, where the
     * other types provide each values as a Java object.
     */
    public enum Type {
        TOKENS(SortField.Type.DOC, RangePQuery.Type.STRING), 
        STRING(SortField.Type.STRING, RangePQuery.Type.STRING), 
        BYTES(SortField.Type.BYTES, null), 
        INT(SortField.Type.INT, RangePQuery.Type.INT), 
        LONG(SortField.Type.LONG, RangePQuery.Type.LONG), 
        TEXT(SortField.Type.DOC, RangePQuery.Type.STRING),
        SOLR_FIELD(SortField.Type.STRING, RangePQuery.Type.STRING) // TODO: understand the implications here???
        ;
        
        private SortField.Type sortFieldType;
        private lux.query.RangePQuery.Type rangeTermType;
        
        Type (SortField.Type sortFieldType, lux.query.RangePQuery.Type rangeTermType) {
        	this.sortFieldType = sortFieldType;
        	this.rangeTermType = rangeTermType;
        }
        
        public SortField.Type getLuceneSortFieldType () {
        	return sortFieldType;
        }
        
        public RangePQuery.Type getRangeTermType () {
        	return rangeTermType;
        }
    };
    
    private final Type type;    
    
    // an Analyzer for text fields; if null, the field is not indexed
    private Analyzer analyzer;

    // an Analyzer to use for query parsing
    private Analyzer queryAnalyzer;

    private final Store isStored;
    
    /**
     * Represents a Solr/Lucene field
     * @param role the role of the field; may be null if the field has no special role.
     * @param analyzer the analyzer associated with the field.  This will
     * be used to analyze string field values, and to analyze queries.  If
     * the field values are not strings (eg if they are a TokenStream), the
     * analyzer is used only for queries.
     * @param isStored whether the field values are to be stored
     * @param type the type of the field values: STRING, TOKENS, INT.
     * @param renameable whether the field is allowed to be renamed
     */
    public FieldDefinition (FieldRole role, Analyzer analyzer, Store isStored, Type type, boolean renameable) {
        this.role = role;
        if (role != null) {
            this.name = role.getFieldName();
        }
        this.analyzer = analyzer;
        this.isStored = isStored;
        this.type = type;
        this.renameable = renameable;
        if (analyzer != null && ! (type == Type.STRING || type == Type.TEXT || type == Type.TOKENS || type == Type.SOLR_FIELD)) {
            throw new LuxException ("Unexpected combination of analyzer and field " + name + " of type: " + type);
        }
    }
    
    /**
     * construct a field definition fulfilling a specific role known to the indexer and optimizer
     * 
     * @param role the role of the field
     * @param analyzer the analyzer associated with the field.  This will
     * be used to analyze string field values, and to analyze queries.
     * @param isStored whether the field values are to be stored
     * @param type the type of the field values: STRING, TOKENS, INT.
     */
    public FieldDefinition (FieldRole role, Analyzer analyzer, Store isStored, Type type) {
        this (role, analyzer, isStored, type, false);
    }
    
    /**
     * creates a field definition without any special role
     * @param analyzer
     * @param isStored
     * @param type
     */
    public FieldDefinition(Analyzer analyzer, Store isStored, Type type) {
        this (null, analyzer, isStored, type);
    }

    /** Wraps the values as Field, which includes the values and the Lucene indexing options.
     * Subclasses must implement getValues() or override this method
     * @param indexer the indexer that holds the field values
     * @return the accumulated values of the field, as {@link IndexableField}s
     */
    public Iterable<? extends IndexableField> getFieldValues(XmlIndexer indexer) {
        Iterable<?> values = getValues(indexer);
        if (values == null) {
            throw new LuxException(getClass().getName() + ".getValues() returned null: did you neglect to implement it?");
        }
        return new FieldValues (this, values);
    }


    /** The Solr XmlUpdateProcessor calls this.  If it returns null, the caller should use the values
     * from getFieldValues() instead.
     * @param indexer the indexer that holds the field values
     * @return the accumulated values of the field, as primitive objects (String or Integer). If 
     */
    public Iterable<?> getValues (XmlIndexer indexer) {
        return null;
    }
    
    /**
     * @return The type of data stored in the field.
     */
    public Type getType () {
        return type;
    }
    
    /**
     * Sets the analyzer that will be used to process text when indexing this field.  The analyzer will also
     * be used to process query text, unless a specific query analyzer is provided using #setQueryAnalyzer. 
     * @param analyzer the {@link org.apache.lucene.analysis.Analyzer} to set
     */
    public void setAnalyzer (Analyzer analyzer) {
        this.analyzer = analyzer;
    }

    /**
     * @return the Analyzer used for indexing, and query analysis if no query analyzer is set.
     */
    public Analyzer getAnalyzer() {
        return analyzer;
    }
    
    /**
     * Sets the analyzer that will be used to process text when parsing queries. 
     * @param analyzer the {@link org.apache.lucene.analysis.Analyzer} to set
     */
    public void setQueryAnalyzer (Analyzer analyzer) {
        this.queryAnalyzer = analyzer;
    }

    /**
     * @return the Analyzer used for query parsing. If no specific query analyzer has been set, 
     * returns the index analyzer (the result of {@link #getAnalyzer()}).
     */
    public Analyzer getQueryAnalyzer() {
        return queryAnalyzer == null ? getAnalyzer() : queryAnalyzer;
    }
    
    public Store isStored() {
        return isStored;
    }
    
    public boolean isSingleValued () {
        return false;
    }
    
    /**
     * Attempts to guess the Solr field properties (see {@link FieldProperties}) based on the available
     * information. Subclasses may need to override to get the correct behavior.  Norms are omitted from
     * all fields; all fields except uri are assumed to be multi-valued.
     * @return the Solr field properties to use when creating a Solr Schema field dynamically
     */
    public int getSolrFieldProperties () {
        int options = 0;
        if (type != Type.BYTES) {
            options |= 1; // INDEXED
        }
        if (analyzer != null) {
            if (analyzer instanceof KeywordAnalyzer) {
                options |= 0x20;    // OMIT_TF_POSITIONS 
                //options |= 0x2000;  // OMIT_POSITIONS
            }
            else {
                options |= 2; // TOKENIZED
            }
        }
        if (isStored == Field.Store.YES) {
            options |= 4; // STORED
        }
        if (!isSingleValued()) {
            options |= 0x200; // MULTIVALUED
        }
        if (type != Type.TOKENS) {
            options |= 0x10; // OMIT_NORMS
        }
        return options;
    }

    @Override
    public String toString () {
        return name;
    }

    /** @return An immutable identifier for the field used to refer to it in code */
    public FieldRole getFieldRole() {
        return role;
    }
    
    /** @return The field name as it appears in queries, and in the index.  Defaults to the FieldName.
    * the XmlIndexer maintains a list of field names so that these intrinsic names
    * can be overridden by configuration
    */
    public String getName () {
        return name;
    }
    
    /** @param luceneFieldName the name of the Lucene field to associate with this definition
     * @see #getName */
    public void setName(String luceneFieldName) {
        this.name = luceneFieldName;
    }

}

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */