NodeIndexer.java example

Explorer
jackrabbit-master
- jackrabbit-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;

import java.math.BigDecimal;
import java.net.URI;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executor;

import javax.jcr.NamespaceException;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;

import org.apache.jackrabbit.core.id.NodeId;
import org.apache.jackrabbit.core.id.PropertyId;
import org.apache.jackrabbit.core.state.ChildNodeEntry;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NoSuchItemStateException;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.core.value.InternalValue;
import org.apache.jackrabbit.spi.Name;
import org.apache.jackrabbit.spi.Path;
import org.apache.jackrabbit.spi.commons.conversion.NamePathResolver;
import org.apache.jackrabbit.spi.commons.name.NameConstants;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Creates a lucene <code>Document</code> object from a {@link javax.jcr.Node}.
 */
public class NodeIndexer {

    /**
     * The logger instance for this class.
     */
    private static final Logger log = LoggerFactory.getLogger(NodeIndexer.class);

    /**
     * The default boost for a lucene field: 1.0f.
     */
    protected static final float DEFAULT_BOOST = IndexingConfiguration.DEFAULT_BOOST;

    /**
     * The <code>NodeState</code> of the node to index
     */
    protected final NodeState node;

    /**
     * The persistent item state provider
     */
    protected final ItemStateManager stateProvider;

    /**
     * Namespace mappings to use for indexing. This is the internal
     * namespace mapping.
     */
    protected final NamespaceMappings mappings;

    /**
     * Name and Path resolver.
     */
    protected final NamePathResolver resolver;

    /**
     * Background task executor used for full text extraction.
     */
    private final Executor executor;

    /**
     * Parser used for extracting text content from binary properties
     * for full text indexing.
     */
    private final Parser parser;

    /**
     * The media types supported by the parser used.
     */
    private Set<MediaType> supportedMediaTypes;

    /**
     * The indexing configuration or <code>null</code> if none is available.
     */
    protected IndexingConfiguration indexingConfig;

    /**
     * If set to <code>true</code> the fulltext field is stored and and a term
     * vector is created with offset information.
     */
    protected boolean supportHighlighting = false;

    /**
     * Indicates index format for this node indexer.
     */
    protected IndexFormatVersion indexFormatVersion = IndexFormatVersion.V1;

    /**
     * List of {@link FieldNames#FULLTEXT} fields which should not be used in
     * an excerpt.
     */
    protected List<Fieldable> doNotUseInExcerpt = new ArrayList<Fieldable>();

    /**
     * The maximum number of characters to extract from binaries.
     */
    private int maxExtractLength = Integer.MAX_VALUE;

    /**
     * Creates a new node indexer.
     *
     * @param node          the node state to index.
     * @param stateProvider the persistent item state manager to retrieve properties.
     * @param mappings      internal namespace mappings.
     * @param executor      background task executor for text extraction
     * @param parser        parser for binary properties
     */
    public NodeIndexer(
            NodeState node, ItemStateManager stateProvider,
            NamespaceMappings mappings, Executor executor, Parser parser) {
        this.node = node;
        this.stateProvider = stateProvider;
        this.mappings = mappings;
        this.resolver = NamePathResolverImpl.create(mappings);
        this.executor = executor;
        this.parser = parser;
    }

    /**
     * Returns the <code>NodeId</code> of the indexed node.
     * @return the <code>NodeId</code> of the indexed node.
     */
    public NodeId getNodeId() {
        return node.getNodeId();
    }

    /**
     * If set to <code>true</code> additional information is stored in the index
     * to support highlighting using the rep:excerpt pseudo property.
     *
     * @param b <code>true</code> to enable highlighting support.
     */
    public void setSupportHighlighting(boolean b) {
        supportHighlighting = b;
    }

    /**
     * Sets the index format version
     *
     * @param indexFormatVersion the index format version
     */
    public void setIndexFormatVersion(IndexFormatVersion indexFormatVersion) {
        this.indexFormatVersion = indexFormatVersion;
    }

    /**
     * Sets the indexing configuration for this node indexer.
     *
     * @param config the indexing configuration.
     */
    public void setIndexingConfiguration(IndexingConfiguration config) {
        this.indexingConfig = config;
    }

    /**
     * Returns the maximum number of characters to extract from binaries.
     *
     * @return maximum extraction length
     */
    public int getMaxExtractLength() {
        return maxExtractLength;
    }

    /**
     * Sets the maximum number of characters to extract from binaries.
     *
     * @param length maximum extraction length
     */
    public void setMaxExtractLength(int length) {
        this.maxExtractLength = length;
    }

    /**
     * Creates a lucene Document.
     *
     * @return the lucene Document with the index layout.
     * @throws RepositoryException if an error occurs while reading property
     *                             values from the <code>ItemStateProvider</code>.
     */
    public Document createDoc() throws RepositoryException {
        doNotUseInExcerpt.clear();
        Document doc = new Document();

        doc.setBoost(getNodeBoost());

        // special fields
        // UUID
        doc.add(new IDField(node.getNodeId()));
        try {
            // parent UUID
            if (node.getParentId() == null) {
                // root node
                Field parent = new Field(FieldNames.PARENT, false, "",
                        Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS,
                        Field.TermVector.NO);
                parent.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
                doc.add(parent);
                addNodeName(doc, "", "");
            } else if (node.getSharedSet().isEmpty()) {
                addParentChildRelation(doc, node.getParentId());
            } else {
                // shareable node
                for (NodeId id : node.getSharedSet()) {
                    addParentChildRelation(doc, id);
                }
                // mark shareable nodes
                doc.add(new Field(FieldNames.SHAREABLE_NODE, false, "",
                        Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
                        Field.TermVector.NO));
            }
        } catch (NoSuchItemStateException e) {
            throwRepositoryException(e);
        } catch (ItemStateException e) {
            throwRepositoryException(e);
        } catch (NamespaceException e) {
            // will never happen, because this.mappings will dynamically add
            // unknown uri<->prefix mappings
        }

        Set<Name> props = node.getPropertyNames();
        for (Name propName : props) {
            if (isIndexed(propName)) {
                PropertyId id = new PropertyId(node.getNodeId(), propName);
                try {
                    PropertyState propState =
                            (PropertyState) stateProvider.getItemState(id);

                    // add each property to the _PROPERTIES_SET for searching
                    // beginning with V2
                    if (indexFormatVersion.getVersion() >= IndexFormatVersion.V2.getVersion()) {
                        addPropertyName(doc, propState.getName());
                    }

                    InternalValue[] values = propState.getValues();
                    for (InternalValue value : values) {
                        addValue(doc, value, propState.getName());
                    }

                    if (values.length > 1) {
                        // real multi-valued
                        addMVPName(doc, propState.getName());
                    }
                } catch (NoSuchItemStateException e) {
                    throwRepositoryException(e);
                } catch (ItemStateException e) {
                    throwRepositoryException(e);
                }
            }
        }

        // now add fields that are not used in excerpt (must go at the end)
        for (Fieldable field : doNotUseInExcerpt) {
            doc.add(field);
        }
        return doc;
    }

    /**
     * Wraps the exception <code>e</code> into a <code>RepositoryException</code>
     * and throws the created exception.
     *
     * @param e the base exception.
     */
    protected void throwRepositoryException(Exception e)
            throws RepositoryException {
        String msg = "Error while indexing node: " + node.getNodeId() + " of "
            + "type: " + node.getNodeTypeName();
        throw new RepositoryException(msg, e);
    }

    /**
     * Adds a {@link FieldNames#MVP} field to <code>doc</code> with the resolved
     * <code>name</code> using the internal search index namespace mapping.
     *
     * @param doc  the lucene document.
     * @param name the name of the multi-value property.
     */
    protected void addMVPName(Document doc, Name name) {
        try {
            String propName = resolver.getJCRName(name);
            doc.add(new Field(FieldNames.MVP, false, propName, Field.Store.NO,
                    Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
        } catch (NamespaceException e) {
            // will never happen, prefixes are created dynamically
        }
    }

    /**
     * Adds a value to the lucene Document.
     *
     * @param doc   the document.
     * @param value the internal jackrabbit value.
     * @param name  the name of the property.
     */
    protected void addValue(Document doc, InternalValue value, Name name) throws RepositoryException {
        String fieldName = name.getLocalName();
        try {
            fieldName = resolver.getJCRName(name);
        } catch (NamespaceException e) {
            // will never happen
        }
        switch (value.getType()) {
            case PropertyType.BINARY:
                addBinaryValue(doc, fieldName, value);
                break;
            case PropertyType.BOOLEAN:
                addBooleanValue(doc, fieldName, value.getBoolean());
                break;
            case PropertyType.DATE:
                addCalendarValue(doc, fieldName, value.getDate());
                break;
            case PropertyType.DOUBLE:
                addDoubleValue(doc, fieldName, value.getDouble());
                break;
            case PropertyType.LONG:
                addLongValue(doc, fieldName, value.getLong());
                break;
            case PropertyType.REFERENCE:
                addReferenceValue(doc, fieldName, value.getNodeId(), false);
                break;
            case PropertyType.WEAKREFERENCE:
                addReferenceValue(doc, fieldName, value.getNodeId(), true);
                break;
            case PropertyType.PATH:
                addPathValue(doc, fieldName, value.getPath());
                break;
            case PropertyType.URI:
                addURIValue(doc, fieldName, value.getURI());
                break;
            case PropertyType.STRING:
                // never fulltext index jcr:uuid String
                if (name.equals(NameConstants.JCR_UUID)) {
                    addStringValue(doc, fieldName, value.getString(),
                            false, false, DEFAULT_BOOST, true);
                } else {
                    addStringValue(doc, fieldName, value.getString(),
                            true, isIncludedInNodeIndex(name),
                            getPropertyBoost(name), useInExcerpt(name));
                }
                break;
            case PropertyType.NAME:
                addNameValue(doc, fieldName, value.getName());
                break;
            case PropertyType.DECIMAL:
                addDecimalValue(doc, fieldName, value.getDecimal());
                break;
            default:
                throw new IllegalArgumentException("illegal internal value type: " + value.getType());
        }
        addValueProperty(doc, value, name, fieldName);
    }

    /**
     * Adds a property related value to the lucene Document. <br>
     *
     * Like <code>length</code> for indexed fields.
     *
     * @param doc
     *            the document.
     * @param value
     *            the internal jackrabbit value.
     * @param name
     *            the name of the property.
     */
    protected void addValueProperty(Document doc, InternalValue value,
            Name name, String fieldName) throws RepositoryException {
        // add length
        if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) {
            addLength(doc, fieldName, value);
        }
    }

    /**
     * Adds the property name to the lucene _:PROPERTIES_SET field.
     *
     * @param doc  the document.
     * @param name the name of the property.
     */
    protected void addPropertyName(Document doc, Name name) {
        String fieldName = name.getLocalName();
        try {
            fieldName = resolver.getJCRName(name);
        } catch (NamespaceException e) {
            // will never happen
        }
        doc.add(new Field(FieldNames.PROPERTIES_SET, false, fieldName,
                Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
                Field.TermVector.NO));
    }

    /**
     * Adds the binary value to the document as the named field.
     * <p>
     * This implementation checks if this {@link #node} is of type nt:resource
     * and if that is the case, tries to extract text from the binary property
     * using the {@link #parser}.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addBinaryValue(Document doc,
                                  String fieldName,
                                  InternalValue internalValue) {
        // 'check' if node is of type nt:resource
        try {
            String jcrData = mappings.getPrefix(Name.NS_JCR_URI) + ":data";
            if (!jcrData.equals(fieldName)) {
                // don't know how to index
                return;
            }

            InternalValue type = getValue(NameConstants.JCR_MIMETYPE);
            if (type != null && isSupportedMediaType(type.getString())) {
                Metadata metadata = new Metadata();
                metadata.set(Metadata.CONTENT_TYPE, type.getString());

                // jcr:encoding is not mandatory
                InternalValue encoding = getValue(NameConstants.JCR_ENCODING);
                if (encoding != null) {
                    metadata.set(
                            Metadata.CONTENT_ENCODING, encoding.getString());
                }

                doc.add(createFulltextField(internalValue, metadata, false));
            }
        } catch (Throwable t) {
            // TODO: How to recover from a transient indexing failure?
            log.warn("Exception while indexing binary property", t);
        }
    }

    /**
     * Utility method that extracts the first value of the named property
     * of the current node. Returns <code>null</code> if the property does
     * not exist or contains no values.
     *
     * @param name property name
     * @return value of the named property, or <code>null</code>
     * @throws ItemStateException if the property can not be accessed
     */
    protected InternalValue getValue(Name name) throws ItemStateException {
        try {
            PropertyId id = new PropertyId(node.getNodeId(), name);
            PropertyState property =
                (PropertyState) stateProvider.getItemState(id);
            InternalValue[] values = property.getValues();
            if (values.length > 0) {
                return values[0];
            } else {
                return null;
            }
        } catch (NoSuchItemStateException e) {
            return null;
        }
    }

    /**
     * Adds the string representation of the boolean value to the document as
     * the named field.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addBooleanValue(Document doc, String fieldName, Object internalValue) {
        doc.add(createFieldWithoutNorms(fieldName, internalValue.toString(),
                PropertyType.BOOLEAN));
    }

    /**
     * Creates a field of name <code>fieldName</code> with the value of <code>
     * internalValue</code>. The created field is indexed without norms.
     *
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     * @param propertyType  the property type.
     */
    protected Field createFieldWithoutNorms(String fieldName,
                                            String internalValue,
                                            int propertyType) {
        if (indexFormatVersion.getVersion()
                >= IndexFormatVersion.V3.getVersion()) {
            Field field = new Field(FieldNames.PROPERTIES,
                    new SingletonTokenStream(
                            FieldNames.createNamedValue(fieldName, internalValue),
                            propertyType)
                    );
            field.setOmitNorms(true);
            return field;
        } else {
            return new Field(FieldNames.PROPERTIES, false,
                    FieldNames.createNamedValue(fieldName, internalValue),
                    Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
                    Field.TermVector.NO);
        }
    }

    /**
     * Adds the calendar value to the document as the named field. The calendar
     * value is converted to an indexable string value using the
     * {@link DateField} class.
     *
     * @param doc
     *            The document to which to add the field
     * @param fieldName
     *            The name of the field to add
     * @param internalValue
     *            The value for the field to add to the document.
     */
    protected void addCalendarValue(Document doc, String fieldName, Calendar internalValue) {
        try {
            doc.add(createFieldWithoutNorms(fieldName,
                    DateField.timeToString(internalValue.getTimeInMillis()),
                    PropertyType.DATE));
        } catch (IllegalArgumentException e) {
            log.warn("'{}' is outside of supported date value range.",
                    internalValue);
        }
    }

    /**
     * Adds the double value to the document as the named field. The double
     * value is converted to an indexable string value using the
     * {@link DoubleField} class.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addDoubleValue(Document doc, String fieldName, double internalValue) {
        doc.add(createFieldWithoutNorms(fieldName, DoubleField.doubleToString(internalValue),
                PropertyType.DOUBLE));
    }

    /**
     * Adds the long value to the document as the named field. The long
     * value is converted to an indexable string value using the {@link LongField}
     * class.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addLongValue(Document doc, String fieldName, long internalValue) {
        doc.add(createFieldWithoutNorms(fieldName, LongField.longToString(internalValue),
                PropertyType.LONG));
    }

    /**
     * Adds the long value to the document as the named field. The long
     * value is converted to an indexable string value using the {@link LongField}
     * class.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addDecimalValue(Document doc, String fieldName, BigDecimal internalValue) {
        doc.add(createFieldWithoutNorms(fieldName, DecimalField.decimalToString(internalValue),
                PropertyType.DECIMAL));
    }

    /**
     * Adds the reference value to the document as the named field. The value's
     * string representation is added as the reference data. Additionally the
     * reference data is stored in the index. As of Jackrabbit 2.0 this method
     * also adds the reference UUID as a {@link FieldNames#WEAK_REFS} field
     * to the index if it is a weak reference.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     * @param weak          Flag indicating whether it's a WEAKREFERENCE (true) or a REFERENCE (flase)
     */
    protected void addReferenceValue(Document doc, String fieldName, NodeId internalValue, boolean weak) {
        String uuid = internalValue.toString();
        doc.add(createFieldWithoutNorms(fieldName, uuid,
                weak ? PropertyType.WEAKREFERENCE : PropertyType.REFERENCE));
        doc.add(new Field(FieldNames.PROPERTIES, false, FieldNames
                .createNamedValue(fieldName, uuid), Field.Store.YES,
                Field.Index.NO, Field.TermVector.NO));
        if (weak) {
            doc.add(new Field(FieldNames.WEAK_REFS, false, uuid,
                    Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
                    Field.TermVector.NO));
        }
    }

    /**
     * Adds the path value to the document as the named field. The path
     * value is converted to an indexable string value using the name space
     * mappings with which this class has been created.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addPathValue(Document doc, String fieldName, Path internalValue) {
        String pathString = internalValue.toString();
        try {
            pathString = resolver.getJCRPath(internalValue);
        } catch (NamespaceException e) {
            // will never happen
        }
        doc.add(createFieldWithoutNorms(fieldName, pathString,
                PropertyType.PATH));
    }

    /**
     * Adds the uri value to the document as the named field.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addURIValue(Document doc, String fieldName, URI internalValue) {
        doc.add(createFieldWithoutNorms(fieldName, internalValue.toString(),
                PropertyType.URI));
    }

    /**
     * Adds the string value to the document both as the named field and for
     * full text indexing.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     * @deprecated Use {@link #addStringValue(Document, String, String, boolean)
     *             addStringValue(Document, String, Object, boolean)} instead.
     */
    protected void addStringValue(Document doc, String fieldName, String internalValue) {
        addStringValue(doc, fieldName, internalValue, true, true, DEFAULT_BOOST, true);
    }

    /**
     * Adds the string value to the document both as the named field and
     * optionally for full text indexing if <code>tokenized</code> is
     * <code>true</code>.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     * @param tokenized     If <code>true</code> the string is also tokenized
     *                      and fulltext indexed.
     */
    protected void addStringValue(Document doc, String fieldName,
                                  String internalValue, boolean tokenized) {
        addStringValue(doc, fieldName, internalValue, tokenized, true, DEFAULT_BOOST, true);
    }

    /**
     * Adds the string value to the document both as the named field and
     * optionally for full text indexing if <code>tokenized</code> is
     * <code>true</code>.
     *
     * @param doc                The document to which to add the field
     * @param fieldName          The name of the field to add
     * @param internalValue      The value for the field to add to the
     *                           document.
     * @param tokenized          If <code>true</code> the string is also
     *                           tokenized and fulltext indexed.
     * @param includeInNodeIndex If <code>true</code> the string is also
     *                           tokenized and added to the node scope fulltext
     *                           index.
     * @param boost              the boost value for this string field.
     * @deprecated use {@link #addStringValue(Document, String, String, boolean, boolean, float, boolean)} instead.
     */
    protected void addStringValue(Document doc, String fieldName,
                                  String internalValue, boolean tokenized,
                                  boolean includeInNodeIndex, float boost) {
        addStringValue(doc, fieldName, internalValue, tokenized, includeInNodeIndex, boost, true);
    }

    /**
     * Adds the string value to the document both as the named field and
     * optionally for full text indexing if <code>tokenized</code> is
     * <code>true</code>.
     *
     * @param doc                The document to which to add the field
     * @param fieldName          The name of the field to add
     * @param internalValue      The value for the field to add to the
     *                           document.
     * @param tokenized          If <code>true</code> the string is also
     *                           tokenized and fulltext indexed.
     * @param includeInNodeIndex If <code>true</code> the string is also
     *                           tokenized and added to the node scope fulltext
     *                           index.
     * @param boost              the boost value for this string field.
     * @param useInExcerpt       If <code>true</code> the string may show up in
     *                           an excerpt.
     */
    protected void addStringValue(Document doc, String fieldName,
                                  String internalValue, boolean tokenized,
                                  boolean includeInNodeIndex, float boost,
                                  boolean useInExcerpt) {

        // simple String
        doc.add(createFieldWithoutNorms(fieldName, internalValue,
                PropertyType.STRING));
        if (tokenized) {
            if (internalValue.length() == 0) {
                return;
            }
            // create fulltext index on property
            int idx = fieldName.indexOf(':');
            fieldName = fieldName.substring(0, idx + 1)
                    + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);
            boolean hasNorms = boost != DEFAULT_BOOST;
            Field.Index indexType = hasNorms ? Field.Index.ANALYZED
                    : Field.Index.ANALYZED_NO_NORMS;
            Field f = new Field(fieldName, true, internalValue, Field.Store.NO,
                    indexType, Field.TermVector.NO);
            f.setBoost(boost);
            doc.add(f);

            if (includeInNodeIndex) {
                // also create fulltext index of this value
                boolean store = supportHighlighting && useInExcerpt;
                f = createFulltextField(internalValue, store, supportHighlighting, hasNorms);
                if (useInExcerpt) {
                    doc.add(f);
                } else {
                    doNotUseInExcerpt.add(f);
                }
            }
        }
    }

    /**
     * Adds the name value to the document as the named field. The name
     * value is converted to an indexable string treating the internal value
     * as a <code>Name</code> and mapping the name space using the name space
     * mappings with which this class has been created.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addNameValue(Document doc, String fieldName, Name internalValue) {
        try {
            String normValue = mappings.getPrefix(internalValue.getNamespaceURI())
                    + ":" + internalValue.getLocalName();
            doc.add(createFieldWithoutNorms(fieldName, normValue,
                    PropertyType.NAME));
        } catch (NamespaceException e) {
            // will never happen
        }
    }

    /**
     * Creates a fulltext field for the string <code>value</code>.
     *
     * @param value the string value.
     * @return a lucene field.
     * @deprecated use {@link #createFulltextField(String, boolean, boolean, boolean)} instead.
     */
    protected Field createFulltextField(String value) {
        return createFulltextField(value, supportHighlighting, supportHighlighting);
    }

    /**
     * Creates a fulltext field for the string <code>value</code>.
     * 
     * @param value the string value.
     * @param store if the value of the field should be stored.
     * @param withOffsets if a term vector with offsets should be stored.
     * @return a lucene field.
     * @deprecated use {@link #createFulltextField(String, boolean, boolean, boolean)} instead.
     */
    protected Field createFulltextField(String value,
                                        boolean store,
                                        boolean withOffsets) {
        return createFulltextField(value, store, withOffsets, true);
    }

    /**
     * Creates a fulltext field for the string <code>value</code>.
     *
     * @param value the string value.
     * @param store if the value of the field should be stored.
     * @param withOffsets if a term vector with offsets should be stored.
     * @param withNorms if norm information should be added for this value
     * @return a lucene field.
     */
    protected Field createFulltextField(String value,
                                        boolean store,
                                        boolean withOffsets,
                                        boolean withNorms) {
        Field.TermVector tv;
        if (withOffsets) {
            tv = Field.TermVector.WITH_OFFSETS;
        } else {
            tv = Field.TermVector.NO;
        }
        Field.Index index;
        if (withNorms) {
            index = Field.Index.ANALYZED;
        } else {
            index = Field.Index.ANALYZED_NO_NORMS;
        }
        if (store) {
            // We would be able to store the field compressed or not depending
            // on a criterion but then we could not determine later is this field
            // has been compressed or not, so we choose to store it uncompressed
            return new Field(FieldNames.FULLTEXT, false, value, Field.Store.YES,
                    index, tv);
        } else {
            return new Field(FieldNames.FULLTEXT, false, value,
                    Field.Store.NO, index, tv);
        }
    }

    /**
     * Creates a fulltext field for the reader <code>value</code>.
     *
     * @param value the binary value
     * @param metadata document metatadata
     * @return a lucene field.
     * @deprecated use {@link #createFulltextField(InternalValue, Metadata, boolean)} instead.
     */
    protected Fieldable createFulltextField(
            InternalValue value, Metadata metadata) {
        return createFulltextField(value, metadata, true);
    }

    /**
     * Creates a fulltext field for the reader <code>value</code>.
     *
     * @param value the binary value
     * @param metadata document metatadata
     * @param withNorms if norm information should be added for this value
     * @return a lucene field.
     */
    protected Fieldable createFulltextField(
            InternalValue value, Metadata metadata, boolean withNorms) {
        return new LazyTextExtractorField(parser, value, metadata, executor,
                supportHighlighting, getMaxExtractLength(), withNorms);
    }

    /**
     * Returns <code>true</code> if the property with the given name should
     * be indexed. The default is to index all properties unless explicit
     * indexing configuration is specified. The <code>jcr:primaryType</code>
     * and <code>jcr:mixinTypes</code> properties are always indexed for
     * correct node type resolution in queries.
     *
     * @param propertyName name of a property.
     * @return <code>true</code> if the property should be indexed;
     *         <code>false</code> otherwise.
     */
    protected boolean isIndexed(Name propertyName) {
        return indexingConfig == null
                || propertyName.equals(NameConstants.JCR_PRIMARYTYPE)
                || propertyName.equals(NameConstants.JCR_MIXINTYPES)
                || indexingConfig.isIndexed(node, propertyName);
    }

    /**
     * Returns <code>true</code> if the property with the given name should also
     * be added to the node scope index.
     *
     * @param propertyName the name of a property.
     * @return <code>true</code> if it should be added to the node scope index;
     *         <code>false</code> otherwise.
     */
    protected boolean isIncludedInNodeIndex(Name propertyName) {
        if (indexingConfig == null) {
            return true;
        } else {
            return indexingConfig.isIncludedInNodeScopeIndex(node, propertyName);
        }
    }

    /**
     * Returns <code>true</code> if the content of the property with the given
     * name should the used to create an excerpt.
     *
     * @param propertyName the name of a property.
     * @return <code>true</code> if it should be used to create an excerpt;
     *         <code>false</code> otherwise.
     */
    protected boolean useInExcerpt(Name propertyName) {
        if (indexingConfig == null) {
            return true;
        } else {
            return indexingConfig.useInExcerpt(node, propertyName);
        }
    }

    /**
     * Returns <code>true</code> if the provided type is among the types
     * supported by the Tika parser we are using.
     *
     * @param type  the type to check.
     * @return whether the type is supported by the Tika parser we are using.
     */
    protected boolean isSupportedMediaType(final String type) {
        if (supportedMediaTypes == null) {
            supportedMediaTypes = parser.getSupportedTypes(new ParseContext());
        }
        return supportedMediaTypes.contains(MediaType.parse(type));
    }

    /**
     * Returns the boost value for the given property name.
     *
     * @param propertyName the name of a property.
     * @return the boost value for the given property name.
     */
    protected float getPropertyBoost(Name propertyName) {
        if (indexingConfig == null) {
            return DEFAULT_BOOST;
        } else {
            return indexingConfig.getPropertyBoost(node, propertyName);
        }
    }

    /**
     * @return the boost value for this {@link #node} state.
     */
    protected float getNodeBoost() {
        if (indexingConfig == null) {
            return DEFAULT_BOOST;
        } else {
            return indexingConfig.getNodeBoost(node);
        }
    }

    /**
     * Adds a {@link FieldNames#PROPERTY_LENGTHS} field to <code>document</code>
     * with a named length value.
     *
     * @param doc          the lucene document.
     * @param propertyName the property name.
     * @param value        the internal value.
     */
    protected void addLength(Document doc,
                             String propertyName,
                             InternalValue value) {
        long length = Util.getLength(value);
        if (length != -1) {
            doc.add(new Field(FieldNames.PROPERTY_LENGTHS, false, FieldNames
                    .createNamedLength(propertyName, length), Field.Store.NO,
                    Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
        }
    }

    /**
     * Depending on the index format version adds one or two fields to the
     * document for the node name.
     *
     * @param doc the lucene document.
     * @param namespaceURI the namespace URI of the node name.
     * @param localName the local name of the node.
     */
    protected void addNodeName(Document doc,
                               String namespaceURI,
                               String localName) throws NamespaceException {
        String name = mappings.getPrefix(namespaceURI) + ":" + localName;
        doc.add(new Field(FieldNames.LABEL, false, name, Field.Store.NO,
                Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
        // as of version 3, also index combination of namespace URI and local name
        if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) {
            doc.add(new Field(FieldNames.NAMESPACE_URI, false, namespaceURI,
                    Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
                    Field.TermVector.NO));
            doc.add(new Field(FieldNames.LOCAL_NAME, false, localName,
                    Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
                    Field.TermVector.NO));
        }
    }

    /**
     * Adds a parent child relation to the given <code>doc</code>.
     *
     * @param doc      the document.
     * @param parentId the id of the parent node.
     * @throws ItemStateException  if the parent node cannot be read.
     * @throws RepositoryException if the parent node does not have a child node
     *                             entry for the current node.
     */
    protected void addParentChildRelation(Document doc,
                                          NodeId parentId)
            throws ItemStateException, RepositoryException {
        Field parentField = new Field(FieldNames.PARENT, false,
                parentId.toString(), Field.Store.YES,
                Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
        parentField.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
        doc.add(parentField);
        NodeState parent = (NodeState) stateProvider.getItemState(parentId);
        ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId());
        if (child == null) {
            // this can only happen when jackrabbit
            // is running in a cluster.
            throw new RepositoryException(
                    "Missing child node entry for node with id: "
                    + node.getNodeId());
        }
        Name name = child.getName();
        addNodeName(doc, name.getNamespaceURI(), name.getLocalName());
    }
}