/*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (c) Alkacon Software GmbH (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.search.fields;
import org.opencms.search.CmsSearchManager;
import org.opencms.util.CmsStringUtil;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
/**
* An individual field configuration in a search index.<p>
*
* @since 7.0.0
*/
public class CmsSearchField {
/** Th default boost factor (1.0), used in case no boost has been set for a field. */
public static final float BOOST_DEFAULT = 1.0f;
/** Name of the field that contains the (optional) category of the document (hardcoded). */
public static final String FIELD_CATEGORY = "category";
/** Name of the field that usually contains the complete content of the document (optional). */
public static final String FIELD_CONTENT = "content";
/** Name of the field that contains the complete extracted content of the document as serialized object (hardcoded). */
public static final String FIELD_CONTENT_BLOB = "contentblob";
/** Name of the field that contains the document content date (hardcoded). */
public static final String FIELD_DATE_CONTENT = "contentdate";
/** Name of the field that contains the document creation date (hardcoded). */
public static final String FIELD_DATE_CREATED = "created";
/** Name of the field that contains the document creation date for fast lookup (hardcoded). */
public static final String FIELD_DATE_CREATED_LOOKUP = "created_lookup";
/** Name of the field that contains the document last modification date (hardcoded). */
public static final String FIELD_DATE_LASTMODIFIED = "lastmodified";
/** Name of the field that contains the document last modification date for fast lookup (hardcoded). */
public static final String FIELD_DATE_LASTMODIFIED_LOOKUP = "lastmodified_lookup";
/** Name of the field that usually contains the value of the "Description" property of the document (optional). */
public static final String FIELD_DESCRIPTION = "description";
/** Name of the field that usually contains the value of the "Keywords" property of the document (optional). */
public static final String FIELD_KEYWORDS = "keywords";
/**
* Name of the field that usually combines all document "meta" information,
* that is the values of the "Title", "Keywords" and "Description" properties (optional).
*/
public static final String FIELD_META = "meta";
/** Name of the field that contains all VFS parent folders of a document (hardcoded). */
public static final String FIELD_PARENT_FOLDERS = "parent-folders";
/** Name of the field that contains the document root path in the VFS (hardcoded). */
public static final String FIELD_PATH = "path";
/**
* Name of the field that contains the (optional) document priority,
* which can be used to boost the document in the result list (hardcoded).
*/
public static final String FIELD_PRIORITY = "priority";
/**
* Name of the field that usually contains the value of the "Title" property of the document
* as a keyword used for sorting and also for retrieving the title text (optional).
*
* Please note: This field should NOT be used for searching. Use {@link #FIELD_TITLE_UNSTORED} instead.<p>
*/
public static final String FIELD_TITLE = "title-key";
/**
* Name of the field that usually contains the value of the "Title" property of the document
* in an analyzed form used for searching in the title (optional).
*/
public static final String FIELD_TITLE_UNSTORED = "title";
/** Name of the field that contains the type of the document. */
public static final String FIELD_TYPE = "type";
/** Value of m_displayName if field should not be displayed. */
public static final String IGNORE_DISPLAY_NAME = "-";
/** Constant for the "compress" index setting. */
public static final String STR_COMPRESS = "compress";
/** Constant for the "no" index setting. */
public static final String STR_NO = "no";
/** Constant for the "tokenized" index setting. */
public static final String STR_TOKENIZED = "tokenized";
/** Constant for the "untokenized" index setting. */
public static final String STR_UN_TOKENIZED = "untokenized";
/** Constant for the "yes" index setting. */
public static final String STR_YES = "yes";
/** The special analyzer to use for this field. */
private Analyzer m_analyzer;
/** The boost factor of the field. */
private float m_boost;
/** Indicates if the content of this field is compressed. */
private boolean m_compressed;
/** A default value for the field in case the content does not provide the value. */
private String m_defaultValue;
/** Indicates if this field should be displayed. */
private boolean m_displayed;
/** The display name of the field. */
private String m_displayName;
/** The display name set from the configuration. */
private String m_displayNameForConfiguration;
/** Indicates if this field should be used for generating the excerpt. */
private boolean m_excerpt;
/** Indicates if the content of this field should be indexed. */
private boolean m_indexed;
/** The search field mappings. */
private List<CmsSearchFieldMapping> m_mappings;
/** The name of the field. */
private String m_name;
/** Indicates if the content of this field should be stored. */
private boolean m_stored;
/** Indicates if the content of this field should be tokenized. */
private boolean m_tokenized;
/**
* Creates a new search field configuration.<p>
*/
public CmsSearchField() {
m_mappings = new ArrayList<CmsSearchFieldMapping>();
m_boost = BOOST_DEFAULT;
}
/**
* Creates a new search field configuration.<p>
*
* The field will be tokenized if it is indexed.
* The field will not be in the excerpt.
* The boost value is the default, that is no special boost is used.
* There is no default value.<p>
*
* @param name the name of the field, see {@link #setName(String)}
* @param displayName the display name of this field, see {@link #setDisplayName(String)}
* @param isStored controls if the field is stored and in the excerpt, see {@link #setStored(boolean)}
* @param isIndexed controls if the field is indexed and tokenized, see {@link #setIndexed(boolean)}
*/
public CmsSearchField(String name, String displayName, boolean isStored, boolean isIndexed) {
this(name, displayName, isStored, isIndexed, isIndexed, false, BOOST_DEFAULT, null);
}
/**
* Creates a new search field configuration.<p>
*
* @param name the name of the field, see {@link #setName(String)}
* @param displayName the display name of this field, see {@link #setDisplayName(String)}
* @param isStored controls if the field is stored, see {@link #setStored(boolean)}
* @param isCompressed controls if the filed is compressed, see {@link #setCompressed(boolean)}
* @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)}
* @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)}
* @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()}
* @param analyzer the Lucene analyzer to use for this field
* @param boost the boost factor for the field, see {@link #setBoost(float)}
* @param defaultValue the default value for the field, see {@link #setDefaultValue(String)}
*/
public CmsSearchField(
String name,
String displayName,
boolean isStored,
boolean isCompressed,
boolean isIndexed,
boolean isTokenized,
boolean isInExcerpt,
Analyzer analyzer,
float boost,
String defaultValue) {
this();
setDisplayName(displayName);
setName(name);
setStored(isStored);
setCompressed(isCompressed);
setIndexed(isIndexed);
setTokenized(isTokenized);
setInExcerpt(isInExcerpt);
setAnalyzer(analyzer);
setBoost(boost);
setDefaultValue(defaultValue);
}
/**
* Creates a new search field configuration.<p>
*
* @param name the name of the field, see {@link #setName(String)}
* @param displayName the display name of this field, see {@link #setDisplayName(String)}
* @param isStored controls if the field is stored, see {@link #setStored(boolean)}
* @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)}
* @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)}
* @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()}
* @param boost the boost factor for the field, see {@link #setBoost(float)}
* @param defaultValue the default value for the field, see {@link #setDefaultValue(String)}
*/
public CmsSearchField(
String name,
String displayName,
boolean isStored,
boolean isIndexed,
boolean isTokenized,
boolean isInExcerpt,
float boost,
String defaultValue) {
this(name, displayName, isStored, false, isIndexed, isTokenized, isInExcerpt, null, boost, defaultValue);
}
/**
* Adds a new field mapping to the internal list of mappings.<p>
*
* @param mapping the mapping to add
*/
public void addMapping(CmsSearchFieldMapping mapping) {
m_mappings.add(mapping);
}
/**
* Creates a Lucene field from the configuration and the provided content.<p>
*
* The configured name of the field as provided by {@link #getName()} is used.<p>
*
* If no valid content is provided (that is the content is either <code>null</code> or
* only whitespace), then no field is created and <code>null</code> is returned.<p>
*
* @param content the content to create the field with
*
* @return a Lucene field created from the configuration and the provided content
*/
public Field createField(String content) {
return createField(getName(), content);
}
/**
* Creates a Lucene field with the given name from the configuration and the provided content.<p>
*
* If no valid content is provided (that is the content is either <code>null</code> or
* only whitespace), then no field is created and <code>null</code> is returned.<p>
*
* @param name the name of the field to create
* @param content the content to create the field with
*
* @return a Lucene field with the given name from the configuration and the provided content
*/
public Field createField(String name, String content) {
if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) {
content = getDefaultValue();
}
if (content != null) {
Index index = Field.Index.NO;
if (isIndexed()) {
if (isTokenizedAndIndexed()) {
index = Field.Index.ANALYZED;
} else {
index = Field.Index.NOT_ANALYZED;
}
}
Field.Store store = Field.Store.NO;
if (isStored() || isCompressed()) {
store = Field.Store.YES;
}
Field result = new Field(name, content, store, index);
if (getBoost() != BOOST_DEFAULT) {
result.setBoost(getBoost());
}
return result;
}
return null;
}
/**
* Two fields are equal if the name of the Lucene field is equal.<p>
*
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
if (obj instanceof CmsSearchField) {
return CmsStringUtil.isEqual(m_name, ((CmsSearchField)obj).m_name);
}
return false;
}
/**
* Returns the analyzer used for this field.<p>
*
* @return the analyzer used for this field
*/
public Analyzer getAnalyzer() {
return m_analyzer;
}
/**
* Returns the boost factor of this field.<p>
*
* The boost factor is a Lucene function that controls the "importance" of a field in the
* search result ranking. The default is <code>1.0</code>. A lower boost factor will make the field
* less important for the result ranking, a higher value will make it more important.<p>
*
* @return the boost factor of this field
*/
public float getBoost() {
return m_boost;
}
/**
* Returns the boost factor of this field as String value for display use.<p>
*
* @return the boost factor of this field as String value for display use
*/
public String getBoostDisplay() {
if (m_boost == BOOST_DEFAULT) {
return null;
}
return String.valueOf(m_boost);
}
/**
* Returns the default value to use if no content for this field was collected.<p>
*
* In case no default is configured, <code>null</code> is returned.<p>
*
* @return the default value to use if no content for this field was collected
*/
public String getDefaultValue() {
return m_defaultValue;
}
/**
* Returns the display name of the field.<p>
*
* @return the display name of the field
*/
public String getDisplayName() {
if (!isDisplayed()) {
return IGNORE_DISPLAY_NAME;
}
if (m_displayName == null) {
return m_name;
} else {
return m_displayName;
}
}
/**
* Returns the displayNameForConfiguration.<p>
*
* @return the displayNameForConfiguration
*/
public String getDisplayNameForConfiguration() {
return m_displayNameForConfiguration;
}
/**
* Returns the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index.<p>
*
* @return the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index
*
* @see #isTokenizedAndIndexed()
* @see #isIndexed()
*/
public String getIndexed() {
if (isTokenizedAndIndexed()) {
return String.valueOf(isTokenizedAndIndexed());
}
if (isIndexed()) {
return STR_UN_TOKENIZED;
} else {
return String.valueOf(isIndexed());
}
}
/**
* Returns the mappings for this field.<p>
*
* @return the mappings for this field
*/
public List<CmsSearchFieldMapping> getMappings() {
return m_mappings;
}
/**
* Returns the name of this field in the Lucene search index.<p>
*
* @return the name of this field in the Lucene search index
*/
public String getName() {
return m_name;
}
/**
* The hash code for a field is based only on the field name.<p>
*
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
return (m_name == null) ? 41 : m_name.hashCode();
}
/**
* Returns <code>true</code> if the content of this field is compressed.<p>
*
* If the field is compressed, it must also be stored, this means
* {@link #isStored()} will always return <code>true</code> for compressed fields.<p>
*
* @return <code>true</code> if the content of this field is compressed
*/
public boolean isCompressed() {
return m_compressed;
}
/**
* Returns true if the field should be displayed.<p>
*
* @return returns true if the field should be displayed otherwise false
*/
public boolean isDisplayed() {
return m_displayed;
}
/**
* Returns the indexed.<p>
*
* @return the indexed
*/
public boolean isIndexed() {
return m_indexed;
}
/**
* Returns <code>true</code> if this fields content is used in the search result excerpt.<p>
*
* @return <code>true</code> if this fields content is used in the search result excerpt
*
* @see #isStored()
*/
public boolean isInExcerpt() {
return m_excerpt;
}
/**
* Returns <code>true</code> if this fields content is used in the search result excerpt.<p>
*
* A field can only be used in the excerpt if it is stored, see {@link #isStored()}.<p>
*
* @return <code>true</code> if this fields content is used in the search result excerpt
*
* @see #isStored()
*/
public boolean isInExcerptAndStored() {
return m_excerpt && m_stored;
}
/**
* Returns <code>true</code> if the content of this field is stored in the Lucene index.<p>
*
* Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store}
* for the concept behind stored and unstored fields.<p>
*
* @return <code>true</code> if the content of this field is stored in the Lucene index
*
* @see #isTokenizedAndIndexed()
*/
public boolean isStored() {
return m_stored;
}
/**
* Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p>
*
* Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Index}
* for the concept behind tokenized and untokenized fields.<p>
*
* @return <code>true</code> if the content of this field is tokenized in the Lucene index
*/
public boolean isTokenized() {
return m_tokenized;
}
/**
* Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p>
*
* A field can only be tokenized if it is also indexed, see {@link #isIndexed()}.<p>
*
* Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Index}
* for the concept behind tokenized and untokenized fields.<p>
*
* @return <code>true</code> if the content of this field is tokenized in the Lucene index
*
* @see #isStored()
* @see #isIndexed()
*/
public boolean isTokenizedAndIndexed() {
return m_tokenized && m_indexed;
}
/**
* Sets the analyzer used for this field.<p>
*
* @param analyzer the analyzer to set
*/
public void setAnalyzer(Analyzer analyzer) {
m_analyzer = analyzer;
}
/**
* Sets the analyzer used for this field.<p>
*
* The parameter must be a name of a class the implements the Lucene {@link Analyzer} interface.
*
* @param analyzer the analyzer class name to set
*
* @throws Exception in case of problems creating the analyzer class instance
*/
public void setAnalyzer(String analyzer) throws Exception {
setAnalyzer(CmsSearchManager.getAnalyzer(analyzer));
}
/**
* Sets the boost factor for this field.<p>
*
* The boost factor is a Lucene function that controls the "importance" of a field in the
* search result ranking. The default is <code>1.0</code>. A lower boost factor will make the field
* less important for the result ranking, a higher value will make it more important.<p>
*
* <b>Use with caution:</b> You should only use this if you fully understand the concept behind
* Lucene boost factors. Otherwise it is likley that your result rankings will be worse then with
* the default values.<p>
*
* @param boost the boost factor to set
*/
public void setBoost(float boost) {
if (boost < 0.0f) {
boost = 0.0f;
}
m_boost = boost;
}
/**
* Sets the boost factor for this field from a String value.<p>
*
* @param boost the boost factor to set
*
* @see #setBoost(float)
*/
public void setBoost(String boost) {
try {
setBoost(Float.valueOf(boost).floatValue());
} catch (NumberFormatException e) {
// invalid number format, use default boost factor
setBoost(BOOST_DEFAULT);
}
}
/**
* Sets the boost factor of this field (only for display use).<p>
*
* @param boost the boost factor to set
*
* @see #setBoost(String)
*/
public void setBoostDisplay(String boost) {
setBoost(boost);
}
/**
* Controls if this field value will be stored compressed or not.<p>
*
* If this is set to <code>true</code>, the value for {@link #isStored()} will also
* be set to <code>true</code>, since compressed fields are always stored.<p>
*
* @param compressed if <code>true</code>, the field value will be stored compressed
*/
public void setCompressed(boolean compressed) {
m_compressed = compressed;
if (compressed) {
setStored(true);
}
}
/**
* Sets the default value to use if no content for this field was collected.<p>
*
* @param defaultValue the default value to set
*/
public void setDefaultValue(String defaultValue) {
if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(defaultValue)) {
m_defaultValue = defaultValue.trim();
} else {
m_defaultValue = null;
}
}
/**
* Controls if the field is displayed or not.<p>
*
* @param displayed if true the field is displayed
*/
public void setDisplayed(boolean displayed) {
m_displayed = displayed;
}
/**
* Sets the display name. If the given name equals IGNORE_DISPLAY_NAME the field is not displayed.<p>
*
* @param displayName the display name to set
*/
public void setDisplayName(String displayName) {
if (CmsStringUtil.isEmpty(displayName) || (IGNORE_DISPLAY_NAME.equals(displayName))) {
m_displayName = null;
setDisplayed(false);
} else {
m_displayName = displayName;
m_displayNameForConfiguration = displayName;
setDisplayed(true);
}
}
/**
* Sets the displayNameForConfiguration.<p>
*
* @param displayNameForConfiguration the displayNameForConfiguration to set
*/
public void setDisplayNameForConfiguration(String displayNameForConfiguration) {
m_displayNameForConfiguration = displayNameForConfiguration;
setDisplayName(displayNameForConfiguration);
}
/**
* Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index.<p>
*
* @param indexed the indexed to set
*
* @see #setTokenized(boolean)
*/
public void setIndexed(boolean indexed) {
m_indexed = indexed;
}
/**
* Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index from a String parameter.<p>
*
* This sets the values for {@link #isIndexed()} as well as {@link #isTokenizedAndIndexed()}.<p>
*
* The parameter can have the following values:
* <ul>
* <li><b>"true"</b> or <b>"tokenized"</b>: The field is indexed and tokenized.
* <li><b>"false"</b> or <b>"no"</b>: The field is not indexed and not tokenized.
* <li><b>"untokenized"</b>: The field is indexed but not tokenized.
* </ul>
*
* @param indexed the index setting to use
*
* @see #setIndexed(boolean)
* @see #setTokenized(boolean)
*/
public void setIndexed(String indexed) {
boolean isIndexed = false;
boolean isTokenized = false;
if (indexed != null) {
indexed = indexed.trim().toLowerCase();
if (STR_TOKENIZED.equals(indexed)) {
isIndexed = true;
isTokenized = true;
} else if (STR_UN_TOKENIZED.equals(indexed)) {
isIndexed = true;
} else if (STR_NO.equals(indexed)) {
// "no", both values will be false
} else {
// only "true" or "false" remain
isIndexed = Boolean.valueOf(indexed).booleanValue();
isTokenized = isIndexed;
}
}
setIndexed(isIndexed);
setTokenized(isTokenized);
}
/**
* Controls if this fields content is used in the search result excerpt.<p>
*
* @param excerpt if <code>true</code>, then this fields content is used in the search excerpt
*/
public void setInExcerpt(boolean excerpt) {
m_excerpt = excerpt;
}
/**
* Controls if this fields content is used in the search result excerpt.<p>
*
* @param excerpt if <code>"true"</code>, then this fields content is used in the search excerpt
*
* @see #setInExcerpt(boolean)
*/
public void setInExcerpt(String excerpt) {
setInExcerpt(Boolean.valueOf(String.valueOf(excerpt)).booleanValue());
}
/**
* Sets the name of this field in the Lucene search index.<p>
*
* @param name the name to set
*/
public void setName(String name) {
m_name = name;
}
/**
* Controls if the content of this field is stored in the Lucene index.<p>
*
* Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store}
* for the concept behind stored and unstored fields.<p>
*
* @param stored if <code>true</code>, then the field content is stored
*
* @see #setTokenized(boolean)
*/
public void setStored(boolean stored) {
m_stored = stored;
}
/**
* Controls if the content of this field is stored in the Lucene index from a String parameter.<p>
*
* @param stored if <code>"true"</code>, then the field content is stored
*
* @see #setStored(boolean)
*/
public void setStored(String stored) {
boolean isStored = false;
boolean isCompressed = false;
if (stored != null) {
stored = stored.trim().toLowerCase();
if (STR_COMPRESS.equals(stored)) {
isCompressed = true;
isStored = true;
} else if (STR_YES.equals(stored)) {
// "yes", value will be stored but not compressed
isStored = true;
} else {
// only "true" or "false" remain
isStored = Boolean.valueOf(stored).booleanValue();
}
}
setStored(isStored);
setCompressed(isCompressed);
}
/**
* Controls if the content of this field is tokenized in the Lucene index.<p>
*
* Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Index}
* for the concept behind tokenized and untokenized fields.<p>
*
* @param tokenized if <code>true</code>, then the field content is tokenized
*
* @see #setStored(boolean)
*/
public void setTokenized(boolean tokenized) {
m_tokenized = tokenized;
}
}