/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.core.query.lucene;
import java.math.BigDecimal;
import java.net.URI;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executor;
import javax.jcr.NamespaceException;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
import org.apache.jackrabbit.core.id.NodeId;
import org.apache.jackrabbit.core.id.PropertyId;
import org.apache.jackrabbit.core.state.ChildNodeEntry;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NoSuchItemStateException;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.core.value.InternalValue;
import org.apache.jackrabbit.spi.Name;
import org.apache.jackrabbit.spi.Path;
import org.apache.jackrabbit.spi.commons.conversion.NamePathResolver;
import org.apache.jackrabbit.spi.commons.name.NameConstants;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Creates a lucene <code>Document</code> object from a {@link javax.jcr.Node}.
*/
public class NodeIndexer {
/**
* The logger instance for this class.
*/
private static final Logger log = LoggerFactory.getLogger(NodeIndexer.class);
/**
* The default boost for a lucene field: 1.0f.
*/
protected static final float DEFAULT_BOOST = IndexingConfiguration.DEFAULT_BOOST;
/**
* The <code>NodeState</code> of the node to index
*/
protected final NodeState node;
/**
* The persistent item state provider
*/
protected final ItemStateManager stateProvider;
/**
* Namespace mappings to use for indexing. This is the internal
* namespace mapping.
*/
protected final NamespaceMappings mappings;
/**
* Name and Path resolver.
*/
protected final NamePathResolver resolver;
/**
* Background task executor used for full text extraction.
*/
private final Executor executor;
/**
* Parser used for extracting text content from binary properties
* for full text indexing.
*/
private final Parser parser;
/**
* The media types supported by the parser used.
*/
private Set<MediaType> supportedMediaTypes;
/**
* The indexing configuration or <code>null</code> if none is available.
*/
protected IndexingConfiguration indexingConfig;
/**
* If set to <code>true</code> the fulltext field is stored and and a term
* vector is created with offset information.
*/
protected boolean supportHighlighting = false;
/**
* Indicates index format for this node indexer.
*/
protected IndexFormatVersion indexFormatVersion = IndexFormatVersion.V1;
/**
* List of {@link FieldNames#FULLTEXT} fields which should not be used in
* an excerpt.
*/
protected List<Fieldable> doNotUseInExcerpt = new ArrayList<Fieldable>();
/**
* The maximum number of characters to extract from binaries.
*/
private int maxExtractLength = Integer.MAX_VALUE;
/**
* Creates a new node indexer.
*
* @param node the node state to index.
* @param stateProvider the persistent item state manager to retrieve properties.
* @param mappings internal namespace mappings.
* @param executor background task executor for text extraction
* @param parser parser for binary properties
*/
public NodeIndexer(
NodeState node, ItemStateManager stateProvider,
NamespaceMappings mappings, Executor executor, Parser parser) {
this.node = node;
this.stateProvider = stateProvider;
this.mappings = mappings;
this.resolver = NamePathResolverImpl.create(mappings);
this.executor = executor;
this.parser = parser;
}
/**
* Returns the <code>NodeId</code> of the indexed node.
* @return the <code>NodeId</code> of the indexed node.
*/
public NodeId getNodeId() {
return node.getNodeId();
}
/**
* If set to <code>true</code> additional information is stored in the index
* to support highlighting using the rep:excerpt pseudo property.
*
* @param b <code>true</code> to enable highlighting support.
*/
public void setSupportHighlighting(boolean b) {
supportHighlighting = b;
}
/**
* Sets the index format version
*
* @param indexFormatVersion the index format version
*/
public void setIndexFormatVersion(IndexFormatVersion indexFormatVersion) {
this.indexFormatVersion = indexFormatVersion;
}
/**
* Sets the indexing configuration for this node indexer.
*
* @param config the indexing configuration.
*/
public void setIndexingConfiguration(IndexingConfiguration config) {
this.indexingConfig = config;
}
/**
* Returns the maximum number of characters to extract from binaries.
*
* @return maximum extraction length
*/
public int getMaxExtractLength() {
return maxExtractLength;
}
/**
* Sets the maximum number of characters to extract from binaries.
*
* @param length maximum extraction length
*/
public void setMaxExtractLength(int length) {
this.maxExtractLength = length;
}
/**
* Creates a lucene Document.
*
* @return the lucene Document with the index layout.
* @throws RepositoryException if an error occurs while reading property
* values from the <code>ItemStateProvider</code>.
*/
public Document createDoc() throws RepositoryException {
doNotUseInExcerpt.clear();
Document doc = new Document();
doc.setBoost(getNodeBoost());
// special fields
// UUID
doc.add(new IDField(node.getNodeId()));
try {
// parent UUID
if (node.getParentId() == null) {
// root node
Field parent = new Field(FieldNames.PARENT, false, "",
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS,
Field.TermVector.NO);
parent.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
doc.add(parent);
addNodeName(doc, "", "");
} else if (node.getSharedSet().isEmpty()) {
addParentChildRelation(doc, node.getParentId());
} else {
// shareable node
for (NodeId id : node.getSharedSet()) {
addParentChildRelation(doc, id);
}
// mark shareable nodes
doc.add(new Field(FieldNames.SHAREABLE_NODE, false, "",
Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
Field.TermVector.NO));
}
} catch (NoSuchItemStateException e) {
throwRepositoryException(e);
} catch (ItemStateException e) {
throwRepositoryException(e);
} catch (NamespaceException e) {
// will never happen, because this.mappings will dynamically add
// unknown uri<->prefix mappings
}
Set<Name> props = node.getPropertyNames();
for (Name propName : props) {
if (isIndexed(propName)) {
PropertyId id = new PropertyId(node.getNodeId(), propName);
try {
PropertyState propState =
(PropertyState) stateProvider.getItemState(id);
// add each property to the _PROPERTIES_SET for searching
// beginning with V2
if (indexFormatVersion.getVersion() >= IndexFormatVersion.V2.getVersion()) {
addPropertyName(doc, propState.getName());
}
InternalValue[] values = propState.getValues();
for (InternalValue value : values) {
addValue(doc, value, propState.getName());
}
if (values.length > 1) {
// real multi-valued
addMVPName(doc, propState.getName());
}
} catch (NoSuchItemStateException e) {
throwRepositoryException(e);
} catch (ItemStateException e) {
throwRepositoryException(e);
}
}
}
// now add fields that are not used in excerpt (must go at the end)
for (Fieldable field : doNotUseInExcerpt) {
doc.add(field);
}
return doc;
}
/**
* Wraps the exception <code>e</code> into a <code>RepositoryException</code>
* and throws the created exception.
*
* @param e the base exception.
*/
protected void throwRepositoryException(Exception e)
throws RepositoryException {
String msg = "Error while indexing node: " + node.getNodeId() + " of "
+ "type: " + node.getNodeTypeName();
throw new RepositoryException(msg, e);
}
/**
* Adds a {@link FieldNames#MVP} field to <code>doc</code> with the resolved
* <code>name</code> using the internal search index namespace mapping.
*
* @param doc the lucene document.
* @param name the name of the multi-value property.
*/
protected void addMVPName(Document doc, Name name) {
try {
String propName = resolver.getJCRName(name);
doc.add(new Field(FieldNames.MVP, false, propName, Field.Store.NO,
Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
} catch (NamespaceException e) {
// will never happen, prefixes are created dynamically
}
}
/**
* Adds a value to the lucene Document.
*
* @param doc the document.
* @param value the internal jackrabbit value.
* @param name the name of the property.
*/
protected void addValue(Document doc, InternalValue value, Name name) throws RepositoryException {
String fieldName = name.getLocalName();
try {
fieldName = resolver.getJCRName(name);
} catch (NamespaceException e) {
// will never happen
}
switch (value.getType()) {
case PropertyType.BINARY:
addBinaryValue(doc, fieldName, value);
break;
case PropertyType.BOOLEAN:
addBooleanValue(doc, fieldName, value.getBoolean());
break;
case PropertyType.DATE:
addCalendarValue(doc, fieldName, value.getDate());
break;
case PropertyType.DOUBLE:
addDoubleValue(doc, fieldName, value.getDouble());
break;
case PropertyType.LONG:
addLongValue(doc, fieldName, value.getLong());
break;
case PropertyType.REFERENCE:
addReferenceValue(doc, fieldName, value.getNodeId(), false);
break;
case PropertyType.WEAKREFERENCE:
addReferenceValue(doc, fieldName, value.getNodeId(), true);
break;
case PropertyType.PATH:
addPathValue(doc, fieldName, value.getPath());
break;
case PropertyType.URI:
addURIValue(doc, fieldName, value.getURI());
break;
case PropertyType.STRING:
// never fulltext index jcr:uuid String
if (name.equals(NameConstants.JCR_UUID)) {
addStringValue(doc, fieldName, value.getString(),
false, false, DEFAULT_BOOST, true);
} else {
addStringValue(doc, fieldName, value.getString(),
true, isIncludedInNodeIndex(name),
getPropertyBoost(name), useInExcerpt(name));
}
break;
case PropertyType.NAME:
addNameValue(doc, fieldName, value.getName());
break;
case PropertyType.DECIMAL:
addDecimalValue(doc, fieldName, value.getDecimal());
break;
default:
throw new IllegalArgumentException("illegal internal value type: " + value.getType());
}
addValueProperty(doc, value, name, fieldName);
}
/**
* Adds a property related value to the lucene Document. <br>
*
* Like <code>length</code> for indexed fields.
*
* @param doc
* the document.
* @param value
* the internal jackrabbit value.
* @param name
* the name of the property.
*/
protected void addValueProperty(Document doc, InternalValue value,
Name name, String fieldName) throws RepositoryException {
// add length
if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) {
addLength(doc, fieldName, value);
}
}
/**
* Adds the property name to the lucene _:PROPERTIES_SET field.
*
* @param doc the document.
* @param name the name of the property.
*/
protected void addPropertyName(Document doc, Name name) {
String fieldName = name.getLocalName();
try {
fieldName = resolver.getJCRName(name);
} catch (NamespaceException e) {
// will never happen
}
doc.add(new Field(FieldNames.PROPERTIES_SET, false, fieldName,
Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
Field.TermVector.NO));
}
/**
* Adds the binary value to the document as the named field.
* <p>
* This implementation checks if this {@link #node} is of type nt:resource
* and if that is the case, tries to extract text from the binary property
* using the {@link #parser}.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addBinaryValue(Document doc,
String fieldName,
InternalValue internalValue) {
// 'check' if node is of type nt:resource
try {
String jcrData = mappings.getPrefix(Name.NS_JCR_URI) + ":data";
if (!jcrData.equals(fieldName)) {
// don't know how to index
return;
}
InternalValue type = getValue(NameConstants.JCR_MIMETYPE);
if (type != null && isSupportedMediaType(type.getString())) {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, type.getString());
// jcr:encoding is not mandatory
InternalValue encoding = getValue(NameConstants.JCR_ENCODING);
if (encoding != null) {
metadata.set(
Metadata.CONTENT_ENCODING, encoding.getString());
}
doc.add(createFulltextField(internalValue, metadata, false));
}
} catch (Throwable t) {
// TODO: How to recover from a transient indexing failure?
log.warn("Exception while indexing binary property", t);
}
}
/**
* Utility method that extracts the first value of the named property
* of the current node. Returns <code>null</code> if the property does
* not exist or contains no values.
*
* @param name property name
* @return value of the named property, or <code>null</code>
* @throws ItemStateException if the property can not be accessed
*/
protected InternalValue getValue(Name name) throws ItemStateException {
try {
PropertyId id = new PropertyId(node.getNodeId(), name);
PropertyState property =
(PropertyState) stateProvider.getItemState(id);
InternalValue[] values = property.getValues();
if (values.length > 0) {
return values[0];
} else {
return null;
}
} catch (NoSuchItemStateException e) {
return null;
}
}
/**
* Adds the string representation of the boolean value to the document as
* the named field.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addBooleanValue(Document doc, String fieldName, Object internalValue) {
doc.add(createFieldWithoutNorms(fieldName, internalValue.toString(),
PropertyType.BOOLEAN));
}
/**
* Creates a field of name <code>fieldName</code> with the value of <code>
* internalValue</code>. The created field is indexed without norms.
*
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
* @param propertyType the property type.
*/
protected Field createFieldWithoutNorms(String fieldName,
String internalValue,
int propertyType) {
if (indexFormatVersion.getVersion()
>= IndexFormatVersion.V3.getVersion()) {
Field field = new Field(FieldNames.PROPERTIES,
new SingletonTokenStream(
FieldNames.createNamedValue(fieldName, internalValue),
propertyType)
);
field.setOmitNorms(true);
return field;
} else {
return new Field(FieldNames.PROPERTIES, false,
FieldNames.createNamedValue(fieldName, internalValue),
Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
Field.TermVector.NO);
}
}
/**
* Adds the calendar value to the document as the named field. The calendar
* value is converted to an indexable string value using the
* {@link DateField} class.
*
* @param doc
* The document to which to add the field
* @param fieldName
* The name of the field to add
* @param internalValue
* The value for the field to add to the document.
*/
protected void addCalendarValue(Document doc, String fieldName, Calendar internalValue) {
try {
doc.add(createFieldWithoutNorms(fieldName,
DateField.timeToString(internalValue.getTimeInMillis()),
PropertyType.DATE));
} catch (IllegalArgumentException e) {
log.warn("'{}' is outside of supported date value range.",
internalValue);
}
}
/**
* Adds the double value to the document as the named field. The double
* value is converted to an indexable string value using the
* {@link DoubleField} class.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addDoubleValue(Document doc, String fieldName, double internalValue) {
doc.add(createFieldWithoutNorms(fieldName, DoubleField.doubleToString(internalValue),
PropertyType.DOUBLE));
}
/**
* Adds the long value to the document as the named field. The long
* value is converted to an indexable string value using the {@link LongField}
* class.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addLongValue(Document doc, String fieldName, long internalValue) {
doc.add(createFieldWithoutNorms(fieldName, LongField.longToString(internalValue),
PropertyType.LONG));
}
/**
* Adds the long value to the document as the named field. The long
* value is converted to an indexable string value using the {@link LongField}
* class.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addDecimalValue(Document doc, String fieldName, BigDecimal internalValue) {
doc.add(createFieldWithoutNorms(fieldName, DecimalField.decimalToString(internalValue),
PropertyType.DECIMAL));
}
/**
* Adds the reference value to the document as the named field. The value's
* string representation is added as the reference data. Additionally the
* reference data is stored in the index. As of Jackrabbit 2.0 this method
* also adds the reference UUID as a {@link FieldNames#WEAK_REFS} field
* to the index if it is a weak reference.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
* @param weak Flag indicating whether it's a WEAKREFERENCE (true) or a REFERENCE (flase)
*/
protected void addReferenceValue(Document doc, String fieldName, NodeId internalValue, boolean weak) {
String uuid = internalValue.toString();
doc.add(createFieldWithoutNorms(fieldName, uuid,
weak ? PropertyType.WEAKREFERENCE : PropertyType.REFERENCE));
doc.add(new Field(FieldNames.PROPERTIES, false, FieldNames
.createNamedValue(fieldName, uuid), Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
if (weak) {
doc.add(new Field(FieldNames.WEAK_REFS, false, uuid,
Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
Field.TermVector.NO));
}
}
/**
* Adds the path value to the document as the named field. The path
* value is converted to an indexable string value using the name space
* mappings with which this class has been created.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addPathValue(Document doc, String fieldName, Path internalValue) {
String pathString = internalValue.toString();
try {
pathString = resolver.getJCRPath(internalValue);
} catch (NamespaceException e) {
// will never happen
}
doc.add(createFieldWithoutNorms(fieldName, pathString,
PropertyType.PATH));
}
/**
* Adds the uri value to the document as the named field.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addURIValue(Document doc, String fieldName, URI internalValue) {
doc.add(createFieldWithoutNorms(fieldName, internalValue.toString(),
PropertyType.URI));
}
/**
* Adds the string value to the document both as the named field and for
* full text indexing.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
* @deprecated Use {@link #addStringValue(Document, String, String, boolean)
* addStringValue(Document, String, Object, boolean)} instead.
*/
protected void addStringValue(Document doc, String fieldName, String internalValue) {
addStringValue(doc, fieldName, internalValue, true, true, DEFAULT_BOOST, true);
}
/**
* Adds the string value to the document both as the named field and
* optionally for full text indexing if <code>tokenized</code> is
* <code>true</code>.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
* @param tokenized If <code>true</code> the string is also tokenized
* and fulltext indexed.
*/
protected void addStringValue(Document doc, String fieldName,
String internalValue, boolean tokenized) {
addStringValue(doc, fieldName, internalValue, tokenized, true, DEFAULT_BOOST, true);
}
/**
* Adds the string value to the document both as the named field and
* optionally for full text indexing if <code>tokenized</code> is
* <code>true</code>.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the
* document.
* @param tokenized If <code>true</code> the string is also
* tokenized and fulltext indexed.
* @param includeInNodeIndex If <code>true</code> the string is also
* tokenized and added to the node scope fulltext
* index.
* @param boost the boost value for this string field.
* @deprecated use {@link #addStringValue(Document, String, String, boolean, boolean, float, boolean)} instead.
*/
protected void addStringValue(Document doc, String fieldName,
String internalValue, boolean tokenized,
boolean includeInNodeIndex, float boost) {
addStringValue(doc, fieldName, internalValue, tokenized, includeInNodeIndex, boost, true);
}
/**
* Adds the string value to the document both as the named field and
* optionally for full text indexing if <code>tokenized</code> is
* <code>true</code>.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the
* document.
* @param tokenized If <code>true</code> the string is also
* tokenized and fulltext indexed.
* @param includeInNodeIndex If <code>true</code> the string is also
* tokenized and added to the node scope fulltext
* index.
* @param boost the boost value for this string field.
* @param useInExcerpt If <code>true</code> the string may show up in
* an excerpt.
*/
protected void addStringValue(Document doc, String fieldName,
String internalValue, boolean tokenized,
boolean includeInNodeIndex, float boost,
boolean useInExcerpt) {
// simple String
doc.add(createFieldWithoutNorms(fieldName, internalValue,
PropertyType.STRING));
if (tokenized) {
if (internalValue.length() == 0) {
return;
}
// create fulltext index on property
int idx = fieldName.indexOf(':');
fieldName = fieldName.substring(0, idx + 1)
+ FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);
boolean hasNorms = boost != DEFAULT_BOOST;
Field.Index indexType = hasNorms ? Field.Index.ANALYZED
: Field.Index.ANALYZED_NO_NORMS;
Field f = new Field(fieldName, true, internalValue, Field.Store.NO,
indexType, Field.TermVector.NO);
f.setBoost(boost);
doc.add(f);
if (includeInNodeIndex) {
// also create fulltext index of this value
boolean store = supportHighlighting && useInExcerpt;
f = createFulltextField(internalValue, store, supportHighlighting, hasNorms);
if (useInExcerpt) {
doc.add(f);
} else {
doNotUseInExcerpt.add(f);
}
}
}
}
/**
* Adds the name value to the document as the named field. The name
* value is converted to an indexable string treating the internal value
* as a <code>Name</code> and mapping the name space using the name space
* mappings with which this class has been created.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addNameValue(Document doc, String fieldName, Name internalValue) {
try {
String normValue = mappings.getPrefix(internalValue.getNamespaceURI())
+ ":" + internalValue.getLocalName();
doc.add(createFieldWithoutNorms(fieldName, normValue,
PropertyType.NAME));
} catch (NamespaceException e) {
// will never happen
}
}
/**
* Creates a fulltext field for the string <code>value</code>.
*
* @param value the string value.
* @return a lucene field.
* @deprecated use {@link #createFulltextField(String, boolean, boolean, boolean)} instead.
*/
protected Field createFulltextField(String value) {
return createFulltextField(value, supportHighlighting, supportHighlighting);
}
/**
* Creates a fulltext field for the string <code>value</code>.
*
* @param value the string value.
* @param store if the value of the field should be stored.
* @param withOffsets if a term vector with offsets should be stored.
* @return a lucene field.
* @deprecated use {@link #createFulltextField(String, boolean, boolean, boolean)} instead.
*/
protected Field createFulltextField(String value,
boolean store,
boolean withOffsets) {
return createFulltextField(value, store, withOffsets, true);
}
/**
* Creates a fulltext field for the string <code>value</code>.
*
* @param value the string value.
* @param store if the value of the field should be stored.
* @param withOffsets if a term vector with offsets should be stored.
* @param withNorms if norm information should be added for this value
* @return a lucene field.
*/
protected Field createFulltextField(String value,
boolean store,
boolean withOffsets,
boolean withNorms) {
Field.TermVector tv;
if (withOffsets) {
tv = Field.TermVector.WITH_OFFSETS;
} else {
tv = Field.TermVector.NO;
}
Field.Index index;
if (withNorms) {
index = Field.Index.ANALYZED;
} else {
index = Field.Index.ANALYZED_NO_NORMS;
}
if (store) {
// We would be able to store the field compressed or not depending
// on a criterion but then we could not determine later is this field
// has been compressed or not, so we choose to store it uncompressed
return new Field(FieldNames.FULLTEXT, false, value, Field.Store.YES,
index, tv);
} else {
return new Field(FieldNames.FULLTEXT, false, value,
Field.Store.NO, index, tv);
}
}
/**
* Creates a fulltext field for the reader <code>value</code>.
*
* @param value the binary value
* @param metadata document metatadata
* @return a lucene field.
* @deprecated use {@link #createFulltextField(InternalValue, Metadata, boolean)} instead.
*/
protected Fieldable createFulltextField(
InternalValue value, Metadata metadata) {
return createFulltextField(value, metadata, true);
}
/**
* Creates a fulltext field for the reader <code>value</code>.
*
* @param value the binary value
* @param metadata document metatadata
* @param withNorms if norm information should be added for this value
* @return a lucene field.
*/
protected Fieldable createFulltextField(
InternalValue value, Metadata metadata, boolean withNorms) {
return new LazyTextExtractorField(parser, value, metadata, executor,
supportHighlighting, getMaxExtractLength(), withNorms);
}
/**
* Returns <code>true</code> if the property with the given name should
* be indexed. The default is to index all properties unless explicit
* indexing configuration is specified. The <code>jcr:primaryType</code>
* and <code>jcr:mixinTypes</code> properties are always indexed for
* correct node type resolution in queries.
*
* @param propertyName name of a property.
* @return <code>true</code> if the property should be indexed;
* <code>false</code> otherwise.
*/
protected boolean isIndexed(Name propertyName) {
return indexingConfig == null
|| propertyName.equals(NameConstants.JCR_PRIMARYTYPE)
|| propertyName.equals(NameConstants.JCR_MIXINTYPES)
|| indexingConfig.isIndexed(node, propertyName);
}
/**
* Returns <code>true</code> if the property with the given name should also
* be added to the node scope index.
*
* @param propertyName the name of a property.
* @return <code>true</code> if it should be added to the node scope index;
* <code>false</code> otherwise.
*/
protected boolean isIncludedInNodeIndex(Name propertyName) {
if (indexingConfig == null) {
return true;
} else {
return indexingConfig.isIncludedInNodeScopeIndex(node, propertyName);
}
}
/**
* Returns <code>true</code> if the content of the property with the given
* name should the used to create an excerpt.
*
* @param propertyName the name of a property.
* @return <code>true</code> if it should be used to create an excerpt;
* <code>false</code> otherwise.
*/
protected boolean useInExcerpt(Name propertyName) {
if (indexingConfig == null) {
return true;
} else {
return indexingConfig.useInExcerpt(node, propertyName);
}
}
/**
* Returns <code>true</code> if the provided type is among the types
* supported by the Tika parser we are using.
*
* @param type the type to check.
* @return whether the type is supported by the Tika parser we are using.
*/
protected boolean isSupportedMediaType(final String type) {
if (supportedMediaTypes == null) {
supportedMediaTypes = parser.getSupportedTypes(new ParseContext());
}
return supportedMediaTypes.contains(MediaType.parse(type));
}
/**
* Returns the boost value for the given property name.
*
* @param propertyName the name of a property.
* @return the boost value for the given property name.
*/
protected float getPropertyBoost(Name propertyName) {
if (indexingConfig == null) {
return DEFAULT_BOOST;
} else {
return indexingConfig.getPropertyBoost(node, propertyName);
}
}
/**
* @return the boost value for this {@link #node} state.
*/
protected float getNodeBoost() {
if (indexingConfig == null) {
return DEFAULT_BOOST;
} else {
return indexingConfig.getNodeBoost(node);
}
}
/**
* Adds a {@link FieldNames#PROPERTY_LENGTHS} field to <code>document</code>
* with a named length value.
*
* @param doc the lucene document.
* @param propertyName the property name.
* @param value the internal value.
*/
protected void addLength(Document doc,
String propertyName,
InternalValue value) {
long length = Util.getLength(value);
if (length != -1) {
doc.add(new Field(FieldNames.PROPERTY_LENGTHS, false, FieldNames
.createNamedLength(propertyName, length), Field.Store.NO,
Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
}
}
/**
* Depending on the index format version adds one or two fields to the
* document for the node name.
*
* @param doc the lucene document.
* @param namespaceURI the namespace URI of the node name.
* @param localName the local name of the node.
*/
protected void addNodeName(Document doc,
String namespaceURI,
String localName) throws NamespaceException {
String name = mappings.getPrefix(namespaceURI) + ":" + localName;
doc.add(new Field(FieldNames.LABEL, false, name, Field.Store.NO,
Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
// as of version 3, also index combination of namespace URI and local name
if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) {
doc.add(new Field(FieldNames.NAMESPACE_URI, false, namespaceURI,
Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
Field.TermVector.NO));
doc.add(new Field(FieldNames.LOCAL_NAME, false, localName,
Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
Field.TermVector.NO));
}
}
/**
* Adds a parent child relation to the given <code>doc</code>.
*
* @param doc the document.
* @param parentId the id of the parent node.
* @throws ItemStateException if the parent node cannot be read.
* @throws RepositoryException if the parent node does not have a child node
* entry for the current node.
*/
protected void addParentChildRelation(Document doc,
NodeId parentId)
throws ItemStateException, RepositoryException {
Field parentField = new Field(FieldNames.PARENT, false,
parentId.toString(), Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
parentField.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
doc.add(parentField);
NodeState parent = (NodeState) stateProvider.getItemState(parentId);
ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId());
if (child == null) {
// this can only happen when jackrabbit
// is running in a cluster.
throw new RepositoryException(
"Missing child node entry for node with id: "
+ node.getNodeId());
}
Name name = child.getName();
addNodeName(doc, name.getNamespaceURI(), name.getLocalName());
}
}