package lux.index.field;
import lux.exception.LuxException;
import lux.index.FieldRole;
import lux.index.XmlIndexer;
import lux.query.RangePQuery;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.SortField;
import org.apache.solr.schema.FieldProperties;
/**
* represents a field in the index corresponding to some XML content.
*
* An XmlField has a name, which may be configured, but must remain the same for all uses
* of a single index.
*
* XmlField provides methods for retrieving Lucene field configuration, and for retrieving values
* from the XmlIndexer to be passed to Lucene.
*
* see {@link lux.index.IndexConfiguration} for a list of current built-in fields.
*/
public abstract class FieldDefinition {
private final FieldRole role;
private String name;
// indicate whether assumptions are being made about the name of this field.
// Some fields are treated in a special way so that the names of the Lucene fields
// can be altered by configuration (for example so as to be compatible with an
// existing schema). Other fields are not expected to be renamed, and their field
// names are assumed to always have certain value.
private final boolean renameable;
// a collation for ordering strings - placeholder for future implementation
// private String collation;
public boolean isRenameable() {
return renameable;
}
/** Represents the type of data fed to the index for a given field.
* TOKENS-type fields are expected to provide a TokenStream, where the
* other types provide each values as a Java object.
*/
public enum Type {
TOKENS(SortField.Type.DOC, RangePQuery.Type.STRING),
STRING(SortField.Type.STRING, RangePQuery.Type.STRING),
BYTES(SortField.Type.BYTES, null),
INT(SortField.Type.INT, RangePQuery.Type.INT),
LONG(SortField.Type.LONG, RangePQuery.Type.LONG),
TEXT(SortField.Type.DOC, RangePQuery.Type.STRING),
SOLR_FIELD(SortField.Type.STRING, RangePQuery.Type.STRING) // TODO: understand the implications here???
;
private SortField.Type sortFieldType;
private lux.query.RangePQuery.Type rangeTermType;
Type (SortField.Type sortFieldType, lux.query.RangePQuery.Type rangeTermType) {
this.sortFieldType = sortFieldType;
this.rangeTermType = rangeTermType;
}
public SortField.Type getLuceneSortFieldType () {
return sortFieldType;
}
public RangePQuery.Type getRangeTermType () {
return rangeTermType;
}
};
private final Type type;
// an Analyzer for text fields; if null, the field is not indexed
private Analyzer analyzer;
// an Analyzer to use for query parsing
private Analyzer queryAnalyzer;
private final Store isStored;
/**
* Represents a Solr/Lucene field
* @param role the role of the field; may be null if the field has no special role.
* @param analyzer the analyzer associated with the field. This will
* be used to analyze string field values, and to analyze queries. If
* the field values are not strings (eg if they are a TokenStream), the
* analyzer is used only for queries.
* @param isStored whether the field values are to be stored
* @param type the type of the field values: STRING, TOKENS, INT.
* @param renameable whether the field is allowed to be renamed
*/
public FieldDefinition (FieldRole role, Analyzer analyzer, Store isStored, Type type, boolean renameable) {
this.role = role;
if (role != null) {
this.name = role.getFieldName();
}
this.analyzer = analyzer;
this.isStored = isStored;
this.type = type;
this.renameable = renameable;
if (analyzer != null && ! (type == Type.STRING || type == Type.TEXT || type == Type.TOKENS || type == Type.SOLR_FIELD)) {
throw new LuxException ("Unexpected combination of analyzer and field " + name + " of type: " + type);
}
}
/**
* construct a field definition fulfilling a specific role known to the indexer and optimizer
*
* @param role the role of the field
* @param analyzer the analyzer associated with the field. This will
* be used to analyze string field values, and to analyze queries.
* @param isStored whether the field values are to be stored
* @param type the type of the field values: STRING, TOKENS, INT.
*/
public FieldDefinition (FieldRole role, Analyzer analyzer, Store isStored, Type type) {
this (role, analyzer, isStored, type, false);
}
/**
* creates a field definition without any special role
* @param analyzer
* @param isStored
* @param type
*/
public FieldDefinition(Analyzer analyzer, Store isStored, Type type) {
this (null, analyzer, isStored, type);
}
/** Wraps the values as Field, which includes the values and the Lucene indexing options.
* Subclasses must implement getValues() or override this method
* @param indexer the indexer that holds the field values
* @return the accumulated values of the field, as {@link IndexableField}s
*/
public Iterable<? extends IndexableField> getFieldValues(XmlIndexer indexer) {
Iterable<?> values = getValues(indexer);
if (values == null) {
throw new LuxException(getClass().getName() + ".getValues() returned null: did you neglect to implement it?");
}
return new FieldValues (this, values);
}
/** The Solr XmlUpdateProcessor calls this. If it returns null, the caller should use the values
* from getFieldValues() instead.
* @param indexer the indexer that holds the field values
* @return the accumulated values of the field, as primitive objects (String or Integer). If
*/
public Iterable<?> getValues (XmlIndexer indexer) {
return null;
}
/**
* @return The type of data stored in the field.
*/
public Type getType () {
return type;
}
/**
* Sets the analyzer that will be used to process text when indexing this field. The analyzer will also
* be used to process query text, unless a specific query analyzer is provided using #setQueryAnalyzer.
* @param analyzer the {@link org.apache.lucene.analysis.Analyzer} to set
*/
public void setAnalyzer (Analyzer analyzer) {
this.analyzer = analyzer;
}
/**
* @return the Analyzer used for indexing, and query analysis if no query analyzer is set.
*/
public Analyzer getAnalyzer() {
return analyzer;
}
/**
* Sets the analyzer that will be used to process text when parsing queries.
* @param analyzer the {@link org.apache.lucene.analysis.Analyzer} to set
*/
public void setQueryAnalyzer (Analyzer analyzer) {
this.queryAnalyzer = analyzer;
}
/**
* @return the Analyzer used for query parsing. If no specific query analyzer has been set,
* returns the index analyzer (the result of {@link #getAnalyzer()}).
*/
public Analyzer getQueryAnalyzer() {
return queryAnalyzer == null ? getAnalyzer() : queryAnalyzer;
}
public Store isStored() {
return isStored;
}
public boolean isSingleValued () {
return false;
}
/**
* Attempts to guess the Solr field properties (see {@link FieldProperties}) based on the available
* information. Subclasses may need to override to get the correct behavior. Norms are omitted from
* all fields; all fields except uri are assumed to be multi-valued.
* @return the Solr field properties to use when creating a Solr Schema field dynamically
*/
public int getSolrFieldProperties () {
int options = 0;
if (type != Type.BYTES) {
options |= 1; // INDEXED
}
if (analyzer != null) {
if (analyzer instanceof KeywordAnalyzer) {
options |= 0x20; // OMIT_TF_POSITIONS
//options |= 0x2000; // OMIT_POSITIONS
}
else {
options |= 2; // TOKENIZED
}
}
if (isStored == Field.Store.YES) {
options |= 4; // STORED
}
if (!isSingleValued()) {
options |= 0x200; // MULTIVALUED
}
if (type != Type.TOKENS) {
options |= 0x10; // OMIT_NORMS
}
return options;
}
@Override
public String toString () {
return name;
}
/** @return An immutable identifier for the field used to refer to it in code */
public FieldRole getFieldRole() {
return role;
}
/** @return The field name as it appears in queries, and in the index. Defaults to the FieldName.
* the XmlIndexer maintains a list of field names so that these intrinsic names
* can be overridden by configuration
*/
public String getName () {
return name;
}
/** @param luceneFieldName the name of the Lucene field to associate with this definition
* @see #getName */
public void setName(String luceneFieldName) {
this.name = luceneFieldName;
}
}
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */