NodeIndexer.java example

Explorer
jcr-master
- jcr-develop
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.exoplatform.services.jcr.impl.core.query.lucene;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.exoplatform.services.document.AdvancedDocumentReader;
import org.exoplatform.services.document.DocumentReadException;
import org.exoplatform.services.document.DocumentReader;
import org.exoplatform.services.document.DocumentReaderService;
import org.exoplatform.services.document.HandlerNotFoundException;
import org.exoplatform.services.jcr.core.ExtendedPropertyType;
import org.exoplatform.services.jcr.dataflow.ItemDataConsumer;
import org.exoplatform.services.jcr.datamodel.InternalQName;
import org.exoplatform.services.jcr.datamodel.ItemType;
import org.exoplatform.services.jcr.datamodel.NodeDataIndexing;
import org.exoplatform.services.jcr.datamodel.PropertyData;
import org.exoplatform.services.jcr.datamodel.QPathEntry;
import org.exoplatform.services.jcr.datamodel.ValueData;
import org.exoplatform.services.jcr.impl.Constants;
import org.exoplatform.services.jcr.impl.core.LocationFactory;
import org.exoplatform.services.jcr.impl.core.itemfilters.ExactQPathEntryFilter;
import org.exoplatform.services.jcr.impl.core.itemfilters.QPathEntryFilter;
import org.exoplatform.services.jcr.impl.dataflow.ValueDataUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import javax.jcr.NamespaceException;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;

/**
 * Creates a lucene <code>Document</code> object from a {@link javax.jcr.Node}.
 */
public class NodeIndexer
{

   /**
    * The logger instance for this class.
    */
   private static final Logger LOG = LoggerFactory.getLogger("exo.jcr.component.core.NodeIndexer");

   /**
    * The default boost for a lucene field: 1.0f.
    */
   protected static final float DEFAULT_BOOST = 1.0f;

   /**
    * The <code>NodeState</code> of the node to index
    */
   protected final NodeDataIndexing node;

   /**
    * The persistent item state provider
    */
   protected final ItemDataConsumer stateProvider;

   /**
    * Namespace mappings to use for indexing. This is the internal
    * namespace mapping.
    */
   protected final NamespaceMappings mappings;

   /**
    * Name and Path resolver.
    */
   protected final LocationFactory resolver;

   /**
    * Content extractor.
    */
   protected final DocumentReaderService extractor;

   /**
    * The indexing configuration or <code>null</code> if none is available.
    */
   protected IndexingConfiguration indexingConfig;

   /**
    * If set to <code>true</code> the fulltext field is stored and and a term
    * vector is created with offset information.
    */
   protected boolean supportHighlighting = false;

   /**
    * Indicates index format for this node indexer.
    */
   protected IndexFormatVersion indexFormatVersion = IndexFormatVersion.V1;

   /**
    * List of {@link FieldNames#FULLTEXT} fields which should not be used in
    * an excerpt.
    */
   protected List<Fieldable> doNotUseInExcerpt = new ArrayList<Fieldable>();

   private int loadBatchingThreshold = -1;

   private boolean loadAllProperties;

   private boolean loadPropertyByName;

   /**
    * Creates a new node indexer.
    *
    * @param node          the node state to index.
    * @param stateProvider the persistent item state manager to retrieve properties.
    * @param mappings      internal namespace mappings.
    * @param extractor     content extractor
    */
   public NodeIndexer(NodeDataIndexing node, ItemDataConsumer stateProvider, NamespaceMappings mappings,
      DocumentReaderService extractor)
   {
      this.node = node;
      this.stateProvider = stateProvider;
      this.mappings = mappings;
      this.resolver = new LocationFactory(mappings);
      this.extractor = extractor;
   }

   /**
    * Returns the <code>NodeId</code> of the indexed node.
    * @return the <code>NodeId</code> of the indexed node.
    */
   public String getNodeId()
   {
      return node.getIdentifier();
   }

   /**
    * If set to <code>true</code> additional information is stored in the index
    * to support highlighting using the rep:excerpt pseudo property.
    *
    * @param b <code>true</code> to enable highlighting support.
    */
   public void setSupportHighlighting(boolean b)
   {
      supportHighlighting = b;
   }

   /**
    * Sets the index format version
    *
    * @param indexFormatVersion the index format version
    */
   public void setIndexFormatVersion(IndexFormatVersion indexFormatVersion)
   {
      this.indexFormatVersion = indexFormatVersion;
   }

   /**
    * Sets the indexing configuration for this node indexer.
    *
    * @param config the indexing configuration.
    */
   public void setIndexingConfiguration(IndexingConfiguration config)
   {
      this.indexingConfig = config;
   }

   /**
    * Sets the threshold from which we decide to load all the properties at the same time
    * instead of loading them individually. This is needed for performance reasons because
    * if you don't have a lot of properties it is faster to get them individually, starting
    * from a given limit it is better to load all the properties to avoid doing too many queries
    * 
    * @param loadBatchingThreshold the value of the threshold.
    */
   public void setLoadBatchingThreshold(int loadBatchingThreshold)
   {
      this.loadBatchingThreshold = loadBatchingThreshold;
   }

   /**
    * Indicates whether or not all the properties should be loaded using the method
    * {@link ItemDataConsumer#getChildPropertiesData(org.exoplatform.services.jcr.datamodel.NodeData)}
    */
   public void setLoadAllProperties(boolean loadAllProperties)
   {
      this.loadAllProperties = loadAllProperties;
   }

   /**
    * Indicates whether a property should be loaded by name or by id when we need to load a property one by one
    */
   public void setLoadPropertyByName(boolean loadPropertyByName)
   {
      this.loadPropertyByName = loadPropertyByName;
   }

   /**
    * Creates a lucene Document.
    *
    * @return the lucene Document with the index layout.
    * @throws RepositoryException if an error occurs while reading property
    *                             values from the <code>ItemStateProvider</code>.
    */
   protected Document createDoc() throws RepositoryException
   {
      doNotUseInExcerpt.clear();
      final Document doc = new Document();

      doc.setBoost(getNodeBoost());

      // special fields
      // UUID
      doc.add(new Field(FieldNames.UUID, node.getIdentifier(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));

      if (indexingConfig != null && indexingConfig.isExcluded(node))
      {
         return doc;
      }

      try
      {
         if (node.getParentIdentifier() == null)
         {
            // root node
            doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
            addNodeName(doc, "", "");
         }
         else
         {
            addParentChildRelation(doc, node.getParentIdentifier());
         }
      }
      catch (NamespaceException e)
      {
         // will never happen, because this.mappings will dynamically add
         // unknown uri<->prefix mappings
         if (LOG.isTraceEnabled())
         {
            LOG.trace("An exception occurred: " + e.getMessage());
         }
      }

      if (indexFormatVersion.getVersion() >= IndexFormatVersion.V4.getVersion())
      {
         doc.add(new Field(FieldNames.INDEX, Integer.toString(node.getQPath().getIndex()), Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS));

         StringBuilder path = new StringBuilder(256);
         path.append(node.getParentIdentifier() == null ? "" : node.getParentIdentifier()).append('/')
            .append(node.getQPath().getName().getAsString());
         doc.add(new Field(FieldNames.PATH, path.toString(), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
      }

      Collection<PropertyData> props = node.getChildPropertiesData();
      if (props == null)
      {
         if (loadAllProperties) 
         {
            props = stateProvider.getChildPropertiesData(node);
         }
         else
         {
            props = stateProvider.listChildPropertiesData(node);
            if (loadBatchingThreshold > -1 && props.size() > loadBatchingThreshold)
            {
               // The limit of properties to load individually has been reached so we perform
               // a batch loading
               List<QPathEntryFilter> filters = new ArrayList<QPathEntryFilter>(props.size());
               for (final PropertyData prop : props)
               {
                  filters.add(new ExactQPathEntryFilter(new QPathEntry(prop.getQPath().getName(), 0)));
               }
               // We use stateProvider.getChildPropertiesData(node, filters) instead of stateProvider.getChildPropertiesData(node)
               // because we want to get the properties from the cache if they are available
               props = stateProvider.getChildPropertiesData(node, filters);
            }
         }
      }

      for (final PropertyData prop : props)
      {

         // add each property to the _PROPERTIES_SET for searching
         // beginning with V2
         if (indexFormatVersion.getVersion() >= IndexFormatVersion.V2.getVersion())
         {
            addPropertyName(doc, prop.getQPath().getName());
         }
         addValues(doc, prop);
      }

      // now add fields that are not used in excerpt (must go at the end)
      for (Iterator<Fieldable> it = doNotUseInExcerpt.iterator(); it.hasNext();)
      {
         doc.add(it.next());
      }
      return doc;
   }

   /**
    * Adds a {@link FieldNames#MVP} field to <code>doc</code> with the resolved
    * <code>name</code> using the internal search index namespace mapping.
    *
    * @param doc  the lucene document.
    * @param name the name of the multi-value property.
    * @throws RepositoryException 
    */
   private void addMVPName(Document doc, InternalQName name) throws RepositoryException
   {
      try
      {
         String propName = resolver.createJCRName(name).getAsString();
         doc.add(new Field(FieldNames.MVP, propName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
            Field.TermVector.NO));
      }
      catch (NamespaceException e)
      {
         // will never happen, prefixes are created dynamically
         if (LOG.isTraceEnabled())
         {
            LOG.trace("An exception occurred: " + e.getMessage());
         }
      }
   }

   /**
    * Adds a value to the lucene Document.
    *
    * @param doc   the document.
    * @param value the internal  value.
    * @param name  the name of the property.
    */
   private void addValues(final Document doc, final PropertyData prop) throws RepositoryException
   {
      int propType = prop.getType();
      String fieldName = resolver.createJCRName(prop.getQPath().getName()).getAsString();
      if (propType == PropertyType.BINARY)
      {

         List<ValueData> data = null;
         if (node.getQPath().getName().equals(Constants.JCR_CONTENT))
         {

            // seems nt:file found, try for nt:resource props
            PropertyData pmime = node.getProperty(Constants.JCR_MIMETYPE.getAsString());
            if (pmime == null && !node.containAllProperties())
            {
               pmime =
                  (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_MIMETYPE, 0),
                     ItemType.PROPERTY);
            }

            if (pmime != null && pmime.getValues() != null && !pmime.getValues().isEmpty())
            {
               // ok, have a reader
               // if the prop obtainer from cache it will contains a values,
               // otherwise read prop with values from DM
               PropertyData propData =
                  prop.getValues() != null && !prop.getValues().isEmpty() ? prop : ((PropertyData)stateProvider
                     .getItemData(node, new QPathEntry(Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  if (propData == null || (data = propData.getValues()) == null || data.isEmpty())
                  {
                     if (LOG.isDebugEnabled())
                     {
                        LOG.debug("No value found for the property located at " + prop.getQPath().getAsString());
                     }
                     return;
                  }

                  DocumentReader dreader =
                     extractor.getDocumentReader(ValueDataUtil.getString(pmime.getValues().get(0)));

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null && !node.containAllProperties())
                  {
                     encProp =
                        (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null && encProp.getValues() != null && !encProp.getValues().isEmpty())
                  {
                     // encoding parameter used
                     encoding = ValueDataUtil.getString(encProp.getValues().get(0));
                  }
                  else
                  {
                     if (LOG.isDebugEnabled())
                     {
                        LOG.debug("No encoding found for the node located at " + node.getQPath().getAsString());
                     }
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally 
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e) //NOSONAR
                           {
                              if (LOG.isTraceEnabled())
                              {
                                 LOG.trace("An exception occurred: " + e.getMessage());
                              }
                           }
                        }
                     }
                  }

                  if (data.size() > 1)
                  {
                     // real multi-valued
                     addMVPName(doc, prop.getQPath().getName());
                  }

               }
               catch (DocumentReadException e)
               {
                  if (LOG.isDebugEnabled())
                  {
                     LOG.debug("Cannot extract the full text content of the property " + propData.getQPath().getAsString()
                        + ", propery id '" + propData.getIdentifier() + "' : " + e, e);
                  }
                  else
                  {
                     LOG.warn("Cannot extract the full text content of the property " + propData.getQPath().getAsString()
                        + ", propery id '" + propData.getIdentifier());
                  }
               }
               catch (HandlerNotFoundException e)
               {
                  // no handler - no index
                  if (LOG.isDebugEnabled())
                  {
                     LOG.debug("Can not indexing the document by path " + propData.getQPath().getAsString()
                        + ", propery id '" + propData.getIdentifier() + "' : " + e, e);
                  }
               }
               catch (IOException e)
               {
                  // no data - no index
                  if (LOG.isDebugEnabled())
                  {
                     LOG.debug("An IO exception occurred while trying to extract the full text content of the property " + propData.getQPath().getAsString()
                        + ", propery id '" + propData.getIdentifier() + "' : " + e, e);
                  }
                  else
                  {
                     LOG.warn("An IO exception occurred while trying to extract the full text content of the property " + propData.getQPath().getAsString()
                        + ", propery id '" + propData.getIdentifier());
                  }
               }
               catch (Exception e)
               {
                  if (LOG.isDebugEnabled())
                  {
                     LOG.debug("An exception occurred while trying to extract the full text content of the property " + propData.getQPath().getAsString()
                        + ", propery id '" + propData.getIdentifier() + "' : " + e, e);
                  }
                  else
                  {
                     LOG.warn("An exception occurred while trying to extract the full text content of the property " + propData.getQPath().getAsString()
                        + ", propery id '" + propData.getIdentifier());
                  }
               }
            }
            else
            {
               if (LOG.isDebugEnabled())
               {
                  LOG.debug("no mime type found for the node located at " + node.getQPath().getAsString());
               }
            }
         }
      }
      else
      {
         try
         {
            // if the prop obtainer from cache it will contains a values, otherwise
            // read prop with values from DM
            // We access to the Item by path to avoid having to rebuild the path if needed in case
            // the indexingLoadBatchingThreshold is enabled only otherwise we get it from the id like
            // before
            PropertyData propData =
               prop.getValues() != null && !prop.getValues().isEmpty() ? prop : (PropertyData)(loadPropertyByName
                  ? stateProvider.getItemData(node, new QPathEntry(prop.getQPath().getName(), 0), ItemType.PROPERTY)
                  : stateProvider.getItemData(prop.getIdentifier()));

            List<ValueData> data;
            if (propData == null || (data = propData.getValues()) == null || data.isEmpty())
            {
               if (LOG.isDebugEnabled())
               {
                  LOG.warn("null value found at property " + prop.getQPath().getAsString());
               }
               return;
            }

            InternalQName name = prop.getQPath().getName();
            for (ValueData value : data)
            {
               switch (propType)
               {
                  case PropertyType.BOOLEAN :
                     if (isIndexed(name))
                     {
                        addBooleanValue(doc, fieldName, ValueDataUtil.getBoolean(value));
                     }
                     break;
                  case PropertyType.DATE :
                     if (isIndexed(name))
                     {
                        addCalendarValue(doc, fieldName, ValueDataUtil.getDate(value));
                     }
                     break;
                  case PropertyType.DOUBLE :
                     if (isIndexed(name))
                     {
                        addDoubleValue(doc, fieldName, ValueDataUtil.getDouble(value));
                     }
                     break;
                  case PropertyType.LONG :
                     if (isIndexed(name))
                     {
                        addLongValue(doc, fieldName, ValueDataUtil.getLong(value));
                     }
                     break;
                  case PropertyType.REFERENCE :
                     if (isIndexed(name))
                     {
                        addReferenceValue(doc, fieldName, ValueDataUtil.getString(value));
                     }
                     break;
                  case PropertyType.PATH :
                     if (isIndexed(name))
                     {
                        addPathValue(doc, fieldName,
                           resolver.createJCRPath(ValueDataUtil.getPath(value)).getAsString(false));
                     }
                     break;
                  case PropertyType.STRING :
                     if (isIndexed(name))
                     {
                        // never fulltext index jcr:uuid String
                        if (name.equals(Constants.JCR_UUID))
                        {
                           addStringValue(doc, fieldName, ValueDataUtil.getString(value), false, false, DEFAULT_BOOST,
                              true);
                        }
                        else
                        {
                           addStringValue(doc, fieldName, ValueDataUtil.getString(value), true,
                              isIncludedInNodeIndex(name), getPropertyBoost(name), useInExcerpt(name));
                        }
                     }
                     break;
                  case PropertyType.NAME :
                     // jcr:primaryType and jcr:mixinTypes are required for correct
                     // node type resolution in queries
                     if (isIndexed(name) || name.equals(Constants.JCR_PRIMARYTYPE)
                        || name.equals(Constants.JCR_MIXINTYPES))
                     {
                        addNameValue(doc, fieldName, resolver.createJCRName(ValueDataUtil.getName(value)).getAsString());
                     }
                     break;
                  case ExtendedPropertyType.PERMISSION :
                     break;
                  default :
                     throw new IllegalArgumentException("illegal internal value type " + propType);
               }
               // add length
               // add not planed
               if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion())
               {
                  addLength(doc, fieldName, value, propType);
               }
            }
            if (data.size() > 1)
            {
               // real multi-valued
               addMVPName(doc, prop.getQPath().getName());
            }
         }
         catch (RepositoryException e)
         {
            LOG.error("Index of property value error. " + prop.getQPath().getAsString() + ".", e);
            throw new RepositoryException("Index of property value error. " + prop.getQPath().getAsString() + ". " + e,
               e);
         }
      }
   }

   /**
    * Adds the property name to the lucene _:PROPERTIES_SET field.
    *
    * @param doc  the document.
    * @param name the name of the property.
   * @throws RepositoryException 
    */
   private void addPropertyName(Document doc, InternalQName name) throws RepositoryException
   {
      String fieldName = name.getName();
      try
      {
         fieldName = resolver.createJCRName(name).getAsString();
      }
      catch (NamespaceException e)
      {
         // will never happen
         if (LOG.isTraceEnabled())
         {
            LOG.trace("An exception occurred: " + e.getMessage());
         }
      }
      doc.add(new Field(FieldNames.PROPERTIES_SET, fieldName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
   }

   /**
    * Adds the string representation of the boolean value to the document as
    * the named field.
    *
    * @param doc           The document to which to add the field
    * @param fieldName     The name of the field to add
    * @param internalValue The value for the field to add to the document.
    */
   protected void addBooleanValue(Document doc, String fieldName, Object internalValue)
   {
      doc.add(createFieldWithoutNorms(fieldName, internalValue.toString(), PropertyType.BOOLEAN));
   }

   /**
    * Creates a field of name <code>fieldName</code> with the value of <code>
    * internalValue</code>. The created field is indexed without norms.
    *
    * @param fieldName     The name of the field to add
    * @param internalValue The value for the field to add to the document.
    * @param propertyType  the property type.
    */
   protected Field createFieldWithoutNorms(String fieldName, String internalValue, int propertyType)
   {
      if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion())
      {
         Field field =
            new Field(FieldNames.PROPERTIES, new SingletonTokenStream(FieldNames.createNamedValue(fieldName,
               internalValue), propertyType));
         field.setOmitNorms(true);
         return field;
      }
      else
      {
         return new Field(FieldNames.PROPERTIES, FieldNames.createNamedValue(fieldName, internalValue), Field.Store.NO,
            Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
      }
   }

   /**
    * Adds the calendar value to the document as the named field. The calendar
    * value is converted to an indexable string value using the
    * {@link DateField} class.
    *
    * @param doc
    *            The document to which to add the field
    * @param fieldName
    *            The name of the field to add
    * @param internalValue
    *            The value for the field to add to the document.
    */
   protected void addCalendarValue(Document doc, String fieldName, Object internalValue)
   {
      Calendar value = (Calendar)internalValue;
      long millis = value.getTimeInMillis();
      try
      {
         doc.add(createFieldWithoutNorms(fieldName, DateField.timeToString(millis), PropertyType.DATE));
      }
      catch (IllegalArgumentException e)
      {
         LOG.warn("'{}' is outside of supported date value range.", new Date(value.getTimeInMillis()));
      }
   }

   /**
    * Adds the double value to the document as the named field. The double
    * value is converted to an indexable string value using the
    * {@link DoubleField} class.
    *
    * @param doc           The document to which to add the field
    * @param fieldName     The name of the field to add
    * @param internalValue The value for the field to add to the document.
    */
   protected void addDoubleValue(Document doc, String fieldName, Object internalValue)
   {
      double doubleVal = ((Double)internalValue).doubleValue();
      doc.add(createFieldWithoutNorms(fieldName, DoubleField.doubleToString(doubleVal), PropertyType.DOUBLE));
   }

   /**
    * Adds the long value to the document as the named field. The long
    * value is converted to an indexable string value using the {@link LongField}
    * class.
    *
    * @param doc           The document to which to add the field
    * @param fieldName     The name of the field to add
    * @param internalValue The value for the field to add to the document.
    */
   protected void addLongValue(Document doc, String fieldName, Object internalValue)
   {
      long longVal = ((Long)internalValue).longValue();
      doc.add(createFieldWithoutNorms(fieldName, LongField.longToString(longVal), PropertyType.LONG));
   }

   /**
    * Adds the reference value to the document as the named field. The value's
    * string representation is added as the reference data. Additionally the
    * reference data is stored in the index.
    *
    * @param doc           The document to which to add the field
    * @param fieldName     The name of the field to add
    * @param internalValue The value for the field to add to the document.
    */
   protected void addReferenceValue(Document doc, String fieldName, Object internalValue)
   {
      String uuid = internalValue.toString();
      doc.add(createFieldWithoutNorms(fieldName, uuid, PropertyType.REFERENCE));
      doc.add(new Field(FieldNames.PROPERTIES, FieldNames.createNamedValue(fieldName, uuid), Field.Store.YES,
         Field.Index.NO, Field.TermVector.NO));
   }

   /**
    * Adds the path value to the document as the named field. The path
    * value is converted to an indexable string value using the name space
    * mappings with which this class has been created.
    *
    * @param doc           The document to which to add the field
    * @param fieldName     The name of the field to add
    * @param pathString    The value for the field to add to the document.
    */
   protected void addPathValue(Document doc, String fieldName, Object pathString)
   {

      doc.add(createFieldWithoutNorms(fieldName, pathString.toString(), PropertyType.PATH));
   }

   /**
    * Adds the string value to the document both as the named field and
    * optionally for full text indexing if <code>tokenized</code> is
    * <code>true</code>.
    *
    * @param doc           The document to which to add the field
    * @param fieldName     The name of the field to add
    * @param internalValue The value for the field to add to the document.
    * @param tokenized     If <code>true</code> the string is also tokenized
    *                      and fulltext indexed.
    */
   protected void addStringValue(Document doc, String fieldName, Object internalValue, boolean tokenized)
   {
      addStringValue(doc, fieldName, internalValue, tokenized, true, DEFAULT_BOOST, true);
   }

   /**
    * Adds the string value to the document both as the named field and
    * optionally for full text indexing if <code>tokenized</code> is
    * <code>true</code>.
    *
    * @param doc                The document to which to add the field
    * @param fieldName          The name of the field to add
    * @param internalValue      The value for the field to add to the
    *                           document.
    * @param tokenized          If <code>true</code> the string is also
    *                           tokenized and fulltext indexed.
    * @param includeInNodeIndex If <code>true</code> the string is also
    *                           tokenized and added to the node scope fulltext
    *                           index.
    * @param boost              the boost value for this string field.
    * @param useInExcerpt       If <code>true</code> the string may show up in
    *                           an excerpt.
    */
   protected void addStringValue(Document doc, String fieldName, Object internalValue, boolean tokenized,
      boolean includeInNodeIndex, float boost, boolean useInExcerpt)
   {

      // simple String
      String stringValue = (String)internalValue;
      doc.add(createFieldWithoutNorms(fieldName, stringValue, PropertyType.STRING));
      if (tokenized)
      {
         if (stringValue.length() == 0)
         {
            return;
         }
         // create fulltext index on property
         int idx = fieldName.indexOf(':');
         fieldName = fieldName.substring(0, idx + 1) + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);
         Field f = new Field(fieldName, stringValue, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
         f.setBoost(boost);
         doc.add(f);

         if (includeInNodeIndex)
         {
            // also create fulltext index of this value
            boolean store = supportHighlighting && useInExcerpt;
            f = createFulltextField(stringValue, store, supportHighlighting);
            if (useInExcerpt)
            {
               doc.add(f);
            }
            else
            {
               doNotUseInExcerpt.add(f);
            }
         }
      }
   }

   /**
    * Adds the name value to the document as the named field. The name
    * value is converted to an indexable string treating the internal value
    * as a qualified name and mapping the name space using the name space
    * mappings with which this class has been created.
    *
    * @param doc           The document to which to add the field
    * @param fieldName     The name of the field to add
    * @param internalValue The value for the field to add to the document.
    */
   protected void addNameValue(Document doc, String fieldName, Object internalValue)
   {
      doc.add(createFieldWithoutNorms(fieldName, internalValue.toString(), PropertyType.NAME));
   }

   /**
    * Creates a fulltext field for the string <code>value</code>.
    *
    * @param value the string value.
    * @param store if the value of the field should be stored.
    * @param withOffsets if a term vector with offsets should be stored.
    * @return a lucene field.
    */
   protected Field createFulltextField(String value, boolean store, boolean withOffsets)
   {
      return new Field(FieldNames.FULLTEXT, value, store ? Field.Store.YES : Field.Store.NO, Field.Index.ANALYZED,
         withOffsets ? Field.TermVector.WITH_OFFSETS : Field.TermVector.NO);
   }

   /**
    * Creates a fulltext field for the reader <code>value</code>.
    *
    * @param value the reader value.
    * @return a lucene field.
    */
   protected Fieldable createFulltextField(Reader value)
   {
      if (supportHighlighting)
      {
         return new TextFieldExtractor(FieldNames.FULLTEXT, value, true, true);
      }
      else
      {
         return new TextFieldExtractor(FieldNames.FULLTEXT, value, false, false);
      }
   }

   /**
    * Returns <code>true</code> if the property with the given name should be
    * indexed.
    *
    * @param propertyName name of a property.
    * @return <code>true</code> if the property should be fulltext indexed;
    *         <code>false</code> otherwise.
    */
   protected boolean isIndexed(InternalQName propertyName)
   {
      if (indexingConfig == null)
      {
         return true;
      }
      else
      {
         return indexingConfig.isIndexed(node, propertyName);
      }
   }

   /**
    * Returns <code>true</code> if the property with the given name should also
    * be added to the node scope index.
    *
    * @param propertyName the name of a property.
    * @return <code>true</code> if it should be added to the node scope index;
    *         <code>false</code> otherwise.
    */
   protected boolean isIncludedInNodeIndex(InternalQName propertyName)
   {
      if (indexingConfig == null)
      {
         return true;
      }
      else
      {
         return indexingConfig.isIncludedInNodeScopeIndex(node, propertyName);
      }
   }

   /**
    * Returns <code>true</code> if the content of the property with the given
    * name should the used to create an excerpt.
    *
    * @param propertyName the name of a property.
    * @return <code>true</code> if it should be used to create an excerpt;
    *         <code>false</code> otherwise.
    */
   protected boolean useInExcerpt(InternalQName propertyName)
   {
      if (indexingConfig == null)
      {
         return true;
      }
      else
      {
         return indexingConfig.useInExcerpt(node, propertyName);
      }
   }

   /**
    * Returns the boost value for the given property name.
    *
    * @param propertyName the name of a property.
    * @return the boost value for the given property name.
    */
   protected float getPropertyBoost(InternalQName propertyName)
   {
      if (indexingConfig == null)
      {
         return DEFAULT_BOOST;
      }
      else
      {
         return indexingConfig.getPropertyBoost(node, propertyName);
      }
   }

   /**
    * @return the boost value for this {@link #node} state.
    */
   protected float getNodeBoost()
   {
      if (indexingConfig == null)
      {
         return DEFAULT_BOOST;
      }
      else
      {
         return indexingConfig.getNodeBoost(node);
      }
   }

   /**
    * Adds a {@link FieldNames#PROPERTY_LENGTHS} field to <code>document</code>
    * with a named length value.
    *
    * @param doc          the lucene document.
    * @param propertyName the property name.
    * @param value        the internal value.
   * @param propType 
    */
   protected void addLength(Document doc, String propertyName, ValueData value, int propType)
   {
      long length = Util.getLength(value, propType);
      if (length != -1)
      {
         doc.add(new Field(FieldNames.PROPERTY_LENGTHS, FieldNames.createNamedLength(propertyName, length),
            Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
      }
   }

   /**
    * Depending on the index format version adds one or two fields to the
    * document for the node name.
    *
    * @param doc the lucene document.
    * @param namespaceURI the namespace URI of the node name.
    * @param localName the local name of the node.
   * @throws RepositoryException 
    */
   protected void addNodeName(Document doc, String namespaceURI, String localName) throws RepositoryException
   {
      String name = mappings.getNamespacePrefixByURI(namespaceURI) + ":" + localName;
      doc.add(new Field(FieldNames.LABEL, name, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
      // as of version 3, also index combination of namespace URI and local name
      if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion())
      {
         doc.add(new Field(FieldNames.NAMESPACE_URI, namespaceURI, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
         doc.add(new Field(FieldNames.LOCAL_NAME, localName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
      }
   }

   /**
    * Adds a parent child relation to the given <code>doc</code>.
    *
    * @param doc      the document.
    * @param parentId the id of the parent node.
    * @throws RepositoryException if the parent node does not have a child node
    *                             entry for the current node.
    */
   protected void addParentChildRelation(Document doc, String parentId) throws RepositoryException
   {
      doc.add(new Field(FieldNames.PARENT, parentId, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS,
         Field.TermVector.NO));
      //        NodeState parent = (NodeState) stateProvider.getItemState(parentId);
      //        ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId());
      //        if (child == null) {
      //            // this can only happen when jackrabbit
      //            // is running in a cluster.
      //            throw new RepositoryException(
      //                    "Missing child node entry for node with id: "
      //                    + node.getNodeId());
      //        }
      InternalQName name = node.getQPath().getName();
      addNodeName(doc, name.getNamespace(), name.getName());
   }
}